From 45d1750d030d6fead6838b41b34b3644d3b00753 Mon Sep 17 00:00:00 2001 From: Kalle Olavi Niemitalo Date: Sun, 20 May 2007 15:31:02 +0300 Subject: [PATCH] Bug 914: Don't let UTF-8 I/O affect internal representations. Use it for the actual I/O only. Previously, defining CONFIG_UTF8 and enabling UTF-8 used to force many strings to the UTF-8 charset regardless of the terminal charset option. Now, those strings always follow the terminal charset. This fixes bug 914 which was caused because _() returned strings in the terminal charset and functions then assumed they were in UTF-8. This reduction in the effects of UTF-8 I/O may also simplify future testing. --- src/bfu/button.c | 12 ++--- src/bfu/dialog.c | 10 ++--- src/bfu/group.c | 10 ++--- src/bfu/inpfield.c | 39 ++++++---------- src/bfu/listbox.c | 2 +- src/bfu/menu.c | 16 +++---- src/bfu/text.c | 4 +- src/bfu/widget.h | 9 ++-- src/dialogs/download.c | 4 +- src/dialogs/menu.c | 2 +- src/dialogs/status.c | 4 +- src/document/forms.h | 11 +++-- src/protocol/bittorrent/dialogs.c | 4 +- src/session/download.c | 2 +- src/session/session.c | 2 +- src/terminal/draw.c | 17 ++++--- src/terminal/draw.h | 10 ++--- src/terminal/event.c | 6 +-- src/terminal/screen.c | 75 ++++++++++++++++--------------- src/terminal/terminal.h | 2 +- src/viewer/text/form.c | 29 ++++-------- src/viewer/text/form.h | 11 +++-- src/viewer/text/link.c | 4 +- src/viewer/text/textarea.c | 2 +- 24 files changed, 133 insertions(+), 154 deletions(-) diff --git a/src/bfu/button.c b/src/bfu/button.c index e175c0e5..8267c945 100644 --- a/src/bfu/button.c +++ b/src/bfu/button.c @@ -130,7 +130,7 @@ dlg_format_buttons(struct terminal *term, mw = 0; #ifdef CONFIG_UTF8 buttons_width(widget_data1, i2 - i1 + 1, NULL, &mw, - term->utf8); + term->utf8_cp); #else buttons_width(widget_data1, i2 - i1 + 1, NULL, &mw); #endif /* CONFIG_UTF8 */ @@ -140,7 +140,7 @@ dlg_format_buttons(struct terminal *term, mw = 0; #ifdef CONFIG_UTF8 - buttons_width(widget_data1, i2 - i1, NULL, &mw, term->utf8); + buttons_width(widget_data1, i2 - i1, NULL, &mw, term->utf8_cp); #else buttons_width(widget_data1, i2 - i1, NULL, &mw); #endif /* CONFIG_UTF8 */ @@ -156,7 +156,7 @@ dlg_format_buttons(struct terminal *term, for (i = i1; i < i2; i++) { #ifdef CONFIG_UTF8 - if (term->utf8) + if (term->utf8_cp) set_box(&widget_data[i].box, p, *y, utf8_ptr2cells(widget_data[i].widget->text, NULL) @@ -196,7 +196,7 @@ display_button(struct dialog_data *dlg_data, struct widget_data *widget_data) if (!color || !shortcut_color) return EVENT_PROCESSED; #ifdef CONFIG_UTF8 - if (term->utf8) { + if (term->utf8_cp) { int button_left_len = utf8_ptr2cells(BUTTON_LEFT, NULL); int button_right_len = utf8_ptr2cells(BUTTON_RIGHT, NULL); @@ -222,7 +222,7 @@ display_button(struct dialog_data *dlg_data, struct widget_data *widget_data) ? SCREEN_ATTR_UNDERLINE : 0; #ifdef CONFIG_UTF8 - if (term->utf8) { + if (term->utf8_cp) { if (hk_pos >= 0) { int hk_bytes = utf8charlen(&text[hk_pos+1]); int cells_to_hk = utf8_ptr2cells(text, @@ -286,7 +286,7 @@ display_button(struct dialog_data *dlg_data, struct widget_data *widget_data) } } #ifdef CONFIG_UTF8 - if (term->utf8) { + if (term->utf8_cp) { int text_cells = utf8_ptr2cells(widget_data->widget->text, NULL); int hk = (widget_data->widget->info.button.hotkey_pos >= 0); diff --git a/src/bfu/dialog.c b/src/bfu/dialog.c index f6ad5299..6694730d 100644 --- a/src/bfu/dialog.c +++ b/src/bfu/dialog.c @@ -102,7 +102,7 @@ redraw_dialog(struct dialog_data *dlg_data, int layout) int x, y; #ifdef CONFIG_UTF8 - if (term->utf8) + if (term->utf8_cp) titlecells = utf8_ptr2cells(title, &title[titlelen]); #endif /* CONFIG_UTF8 */ @@ -110,7 +110,7 @@ redraw_dialog(struct dialog_data *dlg_data, int layout) titlecells = int_min(box.width - 2, titlecells); #ifdef CONFIG_UTF8 - if (term->utf8) + if (term->utf8_cp) titlelen = utf8_cells2bytes(title, titlecells, NULL); #endif /* CONFIG_UTF8 */ @@ -619,7 +619,7 @@ generic_dialog_layouter(struct dialog_data *dlg_data) int x = 0, y, rw; #ifdef CONFIG_UTF8 - if (term->utf8) + if (term->utf8_cp) rw = int_min(w, utf8_ptr2cells(dlg_data->dlg->title, NULL)); else #endif /* CONFIG_UTF8 */ @@ -664,12 +664,12 @@ draw_dialog(struct dialog_data *dlg_data, int width, int height) draw_shadow(term, &dlg_data->box, get_bfu_color(term, "dialog.shadow"), 2, 1); #ifdef CONFIG_UTF8 - if (term->utf8) + if (term->utf8_cp) fix_dwchar_around_box(term, &dlg_data->box, 0, 2, 1); #endif /* CONFIG_UTF8 */ } #ifdef CONFIG_UTF8 - else if (term->utf8) + else if (term->utf8_cp) fix_dwchar_around_box(term, &dlg_data->box, 0, 0, 0); #endif /* CONFIG_UTF8 */ } diff --git a/src/bfu/group.c b/src/bfu/group.c index 3d34e7c7..cc4c6d8d 100644 --- a/src/bfu/group.c +++ b/src/bfu/group.c @@ -40,7 +40,7 @@ dlg_format_group(struct terminal *term, int label_padding; #ifdef CONFIG_UTF8 - if (term->utf8) { + if (term->utf8_cp) { if (text && *text) label_length = utf8_ptr2cells(text, NULL); else @@ -55,7 +55,7 @@ dlg_format_group(struct terminal *term, width = CHECKBOX_LEN; } else if (widget_is_textfield(widget_data)) { #ifdef CONFIG_UTF8 - if (term->utf8) { + if (term->utf8_cp) { width = utf8_ptr2cells(widget_data->widget->data, NULL); } else @@ -82,7 +82,7 @@ dlg_format_group(struct terminal *term, /* Draw text at right of checkbox. */ if (label_length) { #ifdef CONFIG_UTF8 - if (term->utf8) { + if (term->utf8_cp) { int lb = utf8_cells2bytes( text, label_length, @@ -108,7 +108,7 @@ dlg_format_group(struct terminal *term, /* Draw label at left of widget. */ if (label_length) { #ifdef CONFIG_UTF8 - if (term->utf8) { + if (term->utf8_cp) { int lb = utf8_cells2bytes( text, label_length, @@ -149,7 +149,7 @@ group_layouter(struct dialog_data *dlg_data) int n = dlg_data->number_of_widgets - 2; #ifdef CONFIG_UTF8 - if (term->utf8) + if (term->utf8_cp) rw = int_min(w, utf8_ptr2cells(dlg_data->dlg->title, NULL)); else #endif /* CONFIG_UTF8 */ diff --git a/src/bfu/inpfield.c b/src/bfu/inpfield.c index ed6c8590..cab51d86 100644 --- a/src/bfu/inpfield.c +++ b/src/bfu/inpfield.c @@ -271,7 +271,7 @@ display_field_do(struct dialog_data *dlg_data, struct widget_data *widget_data, #endif /* CONFIG_UTF8 */ #ifdef CONFIG_UTF8 - if (term->utf8) { + if (term->utf8_cp) { unsigned char *t = widget_data->cdata; int p = widget_data->info.field.cpos; @@ -298,9 +298,9 @@ display_field_do(struct dialog_data *dlg_data, struct widget_data *widget_data, int len, w; #ifdef CONFIG_UTF8 - if (term->utf8 && !hide) + if (term->utf8_cp && !hide) len = utf8_ptr2cells(text, NULL); - else if (term->utf8) + else if (term->utf8_cp) len = utf8_ptr2chars(text, NULL); else #endif /* CONFIG_UTF8 */ @@ -309,7 +309,7 @@ display_field_do(struct dialog_data *dlg_data, struct widget_data *widget_data, if (!hide) { #ifdef CONFIG_UTF8 - if (term->utf8) + if (term->utf8_cp) w = utf8_cells2bytes(text, w, NULL); #endif /* CONFIG_UTF8 */ draw_text(term, widget_data->box.x, widget_data->box.y, @@ -328,7 +328,7 @@ display_field_do(struct dialog_data *dlg_data, struct widget_data *widget_data, int x; #ifdef CONFIG_UTF8 - if (term->utf8) + if (term->utf8_cp) x = widget_data->box.x + len - left; else #endif /* CONFIG_UTF8 */ @@ -474,7 +474,7 @@ kbd_field(struct dialog_data *dlg_data, struct widget_data *widget_data) case ACT_EDIT_RIGHT: if (widget_data->info.field.cpos < strlen(widget_data->cdata)) { #ifdef CONFIG_UTF8 - if (term->utf8) { + if (term->utf8_cp) { unsigned char *next = widget_data->cdata + widget_data->info.field.cpos; unsigned char *end = strchr(next, '\0'); @@ -492,7 +492,7 @@ kbd_field(struct dialog_data *dlg_data, struct widget_data *widget_data) if (widget_data->info.field.cpos > 0) widget_data->info.field.cpos--; #ifdef CONFIG_UTF8 - if (widget_data->info.field.cpos && term->utf8) { + if (widget_data->info.field.cpos && term->utf8_cp) { unsigned char *t = widget_data->cdata; unsigned char *t2 = t; int p = widget_data->info.field.cpos; @@ -517,7 +517,7 @@ kbd_field(struct dialog_data *dlg_data, struct widget_data *widget_data) case ACT_EDIT_BACKSPACE: #ifdef CONFIG_UTF8 - if (widget_data->info.field.cpos && term->utf8) { + if (widget_data->info.field.cpos && term->utf8_cp) { /* XXX: stolen from src/viewer/text/form.c */ /* FIXME: This isn't nice. We remove last byte * from UTF-8 character to detect @@ -559,7 +559,7 @@ kbd_field(struct dialog_data *dlg_data, struct widget_data *widget_data) if (widget_data->info.field.cpos >= cdata_len) goto display_field; #ifdef CONFIG_UTF8 - if (term->utf8) { + if (term->utf8_cp) { unsigned char *end = widget_data->cdata + cdata_len; unsigned char *text = widget_data->cdata + widget_data->info.field.cpos; unsigned char *old = text; @@ -688,22 +688,11 @@ kbd_field(struct dialog_data *dlg_data, struct widget_data *widget_data) const unsigned char *ins; int inslen; - if (term->utf8) { - /* get_kbd_key(ev) is in UCS-4, - * and @text is in UTF-8. */ - ins = encode_utf8(get_kbd_key(ev)); - /* get_kbd_key(ev) cannot be L'\0' - * because @check_kbd_textinput_key - * would have rejected it. So it - * is OK to use @strlen below. */ - } else { - /* get_kbd_key(ev) is UCS-4, and @text - * is in the terminal's charset. */ - int cp = get_opt_codepage_tree(term->spec, - "charset"); - - ins = u2cp_no_nbsp(get_kbd_key(ev), cp); - } + /* get_kbd_key(ev) is UCS-4, and @text + * is in the terminal's charset. */ + ins = u2cp_no_nbsp(get_kbd_key(ev), + get_opt_codepage_tree(term->spec, + "charset")); inslen = strlen(ins); #endif /* CONFIG_UTF8 */ diff --git a/src/bfu/listbox.c b/src/bfu/listbox.c index 0ad32a98..0b56109a 100644 --- a/src/bfu/listbox.c +++ b/src/bfu/listbox.c @@ -463,7 +463,7 @@ display_listbox_item(struct listbox_item *item, void *data_, int *offset) len = strlen(text); int_upper_bound(&len, int_max(0, data->widget_data->box.width - depth * 5)); #ifdef CONFIG_UTF8 - if (data->term->utf8) + if (data->term->utf8_cp) len_bytes = utf8_cells2bytes(text, len, NULL); else #endif /* CONFIG_UTF8 */ diff --git a/src/bfu/menu.c b/src/bfu/menu.c index f7295554..74b60d70 100644 --- a/src/bfu/menu.c +++ b/src/bfu/menu.c @@ -201,7 +201,7 @@ get_menuitem_text_width(struct terminal *term, struct menu_item *mi) if (!text[0]) return 0; #ifdef CONFIG_UTF8 - if (term->utf8) + if (term->utf8_cp) return L_TEXT_SPACE + utf8_ptr2cells(text, NULL) - !!mi->hotkey_pos + R_TEXT_SPACE; else @@ -383,7 +383,7 @@ draw_menu_left_text(struct terminal *term, unsigned char *text, int len, if (!len) return; #ifdef CONFIG_UTF8 - if (term->utf8) { + if (term->utf8_cp) { max_len = utf8_cells2bytes(text, w, NULL); if (max_len <= 0) return; @@ -431,7 +431,7 @@ draw_menu_left_text_hk(struct terminal *term, unsigned char *text, } #ifdef CONFIG_UTF8 - if (term->utf8) goto utf8; + if (term->utf8_cp) goto utf8; #endif /* CONFIG_UTF8 */ for (x = 0; x - !!hk_state < w && (c = text[x]); x++) { @@ -561,12 +561,12 @@ display_menu(struct terminal *term, struct menu *menu) draw_shadow(term, &menu->box, get_bfu_color(term, "dialog.shadow"), 2, 1); #ifdef CONFIG_UTF8 - if (term->utf8) + if (term->utf8_cp) fix_dwchar_around_box(term, &box, 1, 2, 1); #endif /* CONFIG_UTF8 */ } #ifdef CONFIG_UTF8 - else if (term->utf8) + else if (term->utf8_cp) fix_dwchar_around_box(term, &box, 1, 0, 0); #endif /* CONFIG_UTF8 */ @@ -1115,7 +1115,7 @@ display_mainmenu(struct terminal *term, struct menu *menu) textlen = strlen(text) - !!l; #ifdef CONFIG_UTF8 - if (term->utf8) + if (term->utf8_cp) screencnt = utf8_ptr2cells(text, NULL) - !!l; else #endif /* CONFIG_UTF8 */ @@ -1125,7 +1125,7 @@ display_mainmenu(struct terminal *term, struct menu *menu) color = selected_color; box.x = p; #ifdef CONFIG_UTF8 - if (term->utf8) + if (term->utf8_cp) box.width = L_MAINTEXT_SPACE + L_TEXT_SPACE + screencnt + R_TEXT_SPACE + R_MAINTEXT_SPACE; @@ -1164,7 +1164,7 @@ display_mainmenu(struct terminal *term, struct menu *menu) int_lower_bound(&menu->last, menu->first); if (menu->last < menu->size - 1) { #ifdef CONFIG_UTF8 - if (term->utf8) { + if (term->utf8_cp) { struct screen_char *schar; schar = get_char(term, term->width - R_MAINMENU_SPACE, 0); diff --git a/src/bfu/text.c b/src/bfu/text.c index f5e3f1b7..e8be0191 100644 --- a/src/bfu/text.c +++ b/src/bfu/text.c @@ -231,7 +231,7 @@ dlg_format_text_do(struct terminal *term, unsigned char *text, if (!*text) break; #ifdef CONFIG_UTF8 - line_width = split_line(text, width, &cells, term->utf8); + line_width = split_line(text, width, &cells, term->utf8_cp); #else line_width = split_line(text, width, &cells); #endif /* CONFIG_UTF8 */ @@ -290,7 +290,7 @@ dlg_format_text(struct terminal *term, struct widget_data *widget_data, * split if we don't have to */ #ifdef CONFIG_UTF8 if (widget_data->box.width != width - && !split_lines(widget_data, width, term->utf8)) + && !split_lines(widget_data, width, term->utf8_cp)) return; #else if (widget_data->box.width != width diff --git a/src/bfu/widget.h b/src/bfu/widget.h index 5e7d1814..68496326 100644 --- a/src/bfu/widget.h +++ b/src/bfu/widget.h @@ -52,11 +52,10 @@ struct widget { struct widget_data { struct widget *widget; - /* For WIDGET_FIELD: If CONFIG_UTF8 is defined and UTF-8 I/O - * is enabled for the terminal, then @cdata is in UTF-8; - * otherwise, @cdata is in the charset of the terminal, and - * the charset is assumed to be unibyte. (Thus, if you choose - * UTF-8 as the charset but disable UTF-8 I/O, you lose.) + /* For WIDGET_FIELD: @cdata is in the charset of the terminal. + * (That charset can be UTF-8 only if CONFIG_UTF8 is defined, + * and is assumed to be unibyte otherwise.) The UTF-8 I/O + * option has no effect here. * * For WIDGET_TEXT: @cdata is cast from/to an unsigned char ** * that points to the first element of an array. Each element diff --git a/src/dialogs/download.c b/src/dialogs/download.c index 2d177b39..825e0b53 100644 --- a/src/dialogs/download.c +++ b/src/dialogs/download.c @@ -146,7 +146,7 @@ download_dialog_layouter(struct dialog_data *dlg_data) return; } #ifdef CONFIG_UTF8 - if (term->utf8) + if (term->utf8_cp) decode_uri(url); else #endif /* CONFIG_UTF8 */ @@ -303,7 +303,7 @@ get_file_download_text(struct listbox_item *item, struct terminal *term) uristring = get_uri_string(file_download->uri, URI_PUBLIC); if (uristring) { #ifdef CONFIG_UTF8 - if (term->utf8) + if (term->utf8_cp) decode_uri(uristring); else #endif /* CONFIG_UTF8 */ diff --git a/src/dialogs/menu.c b/src/dialogs/menu.c index 615e66e3..0707b980 100644 --- a/src/dialogs/menu.c +++ b/src/dialogs/menu.c @@ -586,7 +586,7 @@ query_file(struct session *ses, struct uri *uri, void *data, /* Remove the %-ugliness for display */ #ifdef CONFIG_UTF8 - if (ses->tab->term->utf8) + if (ses->tab->term->utf8_cp) decode_uri_string(&def); else #endif /* CONFIG_UTF8 */ diff --git a/src/dialogs/status.c b/src/dialogs/status.c index 84a2c1c0..cfe14609 100644 --- a/src/dialogs/status.c +++ b/src/dialogs/status.c @@ -427,7 +427,7 @@ display_title_bar(struct session *ses, struct terminal *term) int titlelen, titlewidth; #ifdef CONFIG_UTF8 - if (term->utf8) { + if (term->utf8_cp) { titlewidth = utf8_ptr2cells(document->title, NULL); titlewidth = int_min(titlewidth, maxlen); @@ -452,7 +452,7 @@ display_title_bar(struct session *ses, struct terminal *term) if (title.length) { int x; #ifdef CONFIG_UTF8 - if (term->utf8) { + if (term->utf8_cp) { x = int_max(term->width - 1 - utf8_ptr2cells(title.source, title.source diff --git a/src/document/forms.h b/src/document/forms.h index 17449892..0762fa5c 100644 --- a/src/document/forms.h +++ b/src/document/forms.h @@ -98,12 +98,11 @@ struct form_control { int maxlength; int nvalues; unsigned char **values; - /* For FC_SELECT: If CONFIG_UTF8 is defined and UTF-8 I/O is - * enabled for the terminal, then @labels is in UTF-8; - * otherwise, @labels is in the charset of the terminal, and - * the charset is assumed to be unibyte. (Thus, if you choose - * UTF-8 as the charset but disable UTF-8 I/O, you lose.) The - * charset of the document has no effect here. */ + /* For FC_SELECT: @labels are in the charset of the terminal. + * (That charset can be UTF-8 only if CONFIG_UTF8 is defined, + * and is assumed to be unibyte otherwise.) The charset of + * the document and the UTF-8 I/O option have no effect + * here. */ unsigned char **labels; struct menu_item *menu; }; diff --git a/src/protocol/bittorrent/dialogs.c b/src/protocol/bittorrent/dialogs.c index 5001c5e0..365c5666 100644 --- a/src/protocol/bittorrent/dialogs.c +++ b/src/protocol/bittorrent/dialogs.c @@ -575,7 +575,7 @@ bittorrent_message_dialog(struct session *ses, void *data) uristring = get_uri_string(message->uri, URI_PUBLIC); if (uristring) { #ifdef CONFIG_UTF8 - if (ses->tab->term->utf8) + if (ses->tab->term->utf8_cp) decode_uri(uristring); else #endif /* CONFIG_UTF8 */ @@ -725,7 +725,7 @@ bittorrent_query_callback(void *data, enum connection_state state, /* Let's make the filename pretty for display & save */ /* TODO: The filename can be the empty string here. See bug 396. */ #ifdef CONFIG_UTF8 - if (term->utf8) + if (term->utf8_cp) decode_uri_string(&filename); else #endif /* CONFIG_UTF8 */ diff --git a/src/session/download.c b/src/session/download.c index a928bf2d..3f341212 100644 --- a/src/session/download.c +++ b/src/session/download.c @@ -1158,7 +1158,7 @@ do_type_query(struct type_query *type_query, unsigned char *ct, struct mime_hand /* Let's make the filename pretty for display & save */ /* TODO: The filename can be the empty string here. See bug 396. */ #ifdef CONFIG_UTF8 - if (term->utf8) + if (term->utf8_cp) decode_uri_string(&filename); else #endif /* CONFIG_UTF8 */ diff --git a/src/session/session.c b/src/session/session.c index 04d3f7c6..d0d4e81c 100644 --- a/src/session/session.c +++ b/src/session/session.c @@ -267,7 +267,7 @@ print_error_dialog(struct session *ses, enum connection_state state, uristring = uri ? get_uri_string(uri, URI_PUBLIC) : NULL; if (uristring) { #ifdef CONFIG_UTF8 - if (ses->tab->term->utf8) + if (ses->tab->term->utf8_cp) decode_uri(uristring); else #endif /* CONFIG_UTF8 */ diff --git a/src/terminal/draw.c b/src/terminal/draw.c index cf72dbff..dae5d473 100644 --- a/src/terminal/draw.c +++ b/src/terminal/draw.c @@ -104,6 +104,9 @@ draw_char_color(struct terminal *term, int x, int y, struct color_pair *color) set_screen_dirty(term->screen, y, y); } +/* The data parameter here is like screen_char.data: UCS-4 if the + * charset of the terminal is UTF-8 (possible only if CONFIG_UTF8 is + * defined), and a byte otherwise. */ void #ifdef CONFIG_UTF8 draw_char_data(struct terminal *term, int x, int y, unicode_val_T data) @@ -120,10 +123,10 @@ draw_char_data(struct terminal *term, int x, int y, unsigned char data) #ifdef CONFIG_UTF8 #ifdef CONFIG_DEBUG /* Detect attempt to draw double-width char on the last - * column of terminal. The unicode_to_cell(data) call - * is in principle wrong if CONFIG_UTF8 is defined but - * UTF-8 I/O is disabled, because @data is then a byte - * in the charset of the terminal; but unicode_to_cell + * column of terminal. The unicode_to_cell(data) call is + * in principle wrong if CONFIG_UTF8 is defined but the + * charset of the terminal is not UTF-8, because @data + * is then a byte in that charset; but unicode_to_cell * returns 1 for U+0000...U+00FF so it's not a problem. */ if (unicode_to_cell(data) == 2 && x + 1 > term->width) INTERNAL("Attempt to draw double-width glyph on last column!"); @@ -152,7 +155,7 @@ draw_line(struct terminal *term, int x, int y, int l, struct screen_char *line) if (size == 0) return; #ifdef CONFIG_UTF8 - if (term->utf8) { + if (term->utf8_cp) { struct screen_char *sc; if (line[0].data == UCS_NO_CHAR && x == 0) { @@ -272,7 +275,7 @@ fix_dwchar_around_box(struct terminal *term, struct box *box, int border, struct screen_char *schar; int height, x, y; - if (!term->utf8) + if (!term->utf8_cp) return; /* 1 */ @@ -497,7 +500,7 @@ draw_text(struct terminal *term, int x, int y, if_assert_failed return; #ifdef CONFIG_UTF8 - if (term->utf8) { + if (term->utf8_cp) { draw_text_utf8(term, x, y, text, length, attr, color); return; } diff --git a/src/terminal/draw.h b/src/terminal/draw.h index 7d3927cf..83405a28 100644 --- a/src/terminal/draw.h +++ b/src/terminal/draw.h @@ -33,11 +33,11 @@ struct screen_char { /* Contains either character value or frame data. * If @attr includes SCREEN_ATTR_FRAME, then @data is enum * border_char; otherwise, @data is a character value. - * If CONFIG_UTF8 is defined, and UTF-8 I/O is enabled for the - * terminal, then the character value is in UCS-4; otherwise, - * it is in the charset of the terminal, and the charset is - * assumed to be unibyte. (Thus, if you choose UTF-8 as the - * charset but disable UTF-8 I/O, you lose.) */ + * If the charset of the terminal is UTF-8 (which is possible + * only if CONFIG_UTF8 is defined), then the character value + * is in UCS-4; otherwise, the charset is assumed to be + * unibyte, and the character value is a byte in that + * charset. */ #ifdef CONFIG_UTF8 unicode_val_T data; #else diff --git a/src/terminal/event.c b/src/terminal/event.c index 2b715e07..5922727d 100644 --- a/src/terminal/event.c +++ b/src/terminal/event.c @@ -187,7 +187,7 @@ check_terminal_name(struct terminal *term, struct terminal_info *info) /* Force UTF-8 I/O if the UTF-8 charset is selected. Various * places assume that the terminal's charset is unibyte if * UTF-8 I/O is disabled. (bug 827) */ - term->utf8 = term->utf8_cp + term->utf8_io = term->utf8_cp || get_opt_bool_tree(term->spec, "utf_8_io"); #endif /* CONFIG_UTF8 */ } @@ -304,13 +304,13 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev) #ifdef CONFIG_UTF8 /* struct term_event_keyboard carries UCS-4. * - If the "utf_8_io" option is true or the "charset" - * option refers to UTF-8, then term->utf8 is true, + * option refers to UTF-8, then term->utf8_io is true, * and handle_interlink_event() converts from UTF-8 * to UCS-4. * - Otherwise, handle_interlink_event() converts from * the codepage specified with the "charset" option * to UCS-4. */ - utf8_io = term->utf8; + utf8_io = term->utf8_io; #else /* struct term_event_keyboard carries bytes in the * charset of the terminal. diff --git a/src/terminal/screen.c b/src/terminal/screen.c index d5acf2ef..a8bc6d0d 100644 --- a/src/terminal/screen.c +++ b/src/terminal/screen.c @@ -326,7 +326,15 @@ set_screen_driver_opt(struct screen_driver *driver, struct option *term_spec) driver->opt.charsets[1] = get_cp_index("koi8-r"); } else { - driver->opt.charsets[1] = driver->opt.charsets[0]; +#ifdef CONFIG_UTF8 + /* Don't let driver->opt.charsets[1] become + * UTF-8, because it is passed to cp2u(), + * which supports only unibyte characters. */ + if (driver->opt.utf8_cp) + driver->opt.charsets[1] = get_cp_index("US-ASCII"); + else +#endif /* CONFIG_UTF8 */ + driver->opt.charsets[1] = driver->opt.charsets[0]; } } else { /* !utf8_io */ @@ -385,7 +393,7 @@ add_screen_driver(enum term_mode_type type, struct terminal *term, int env_len) term->spec->change_hook = screen_driver_change_hook; #ifdef CONFIG_UTF8 - term->utf8 = use_utf8_io(driver); + term->utf8_io = use_utf8_io(driver); term->utf8_cp = driver->opt.utf8_cp; #endif /* CONFIG_UTF8 */ @@ -408,7 +416,7 @@ get_screen_driver(struct terminal *term) move_to_top_of_list(active_screen_drivers, driver); #ifdef CONFIG_UTF8 - term->utf8 = use_utf8_io(driver); + term->utf8_io = use_utf8_io(driver); term->utf8_cp = driver->opt.utf8_cp; #endif /* CONFIG_UTF8 */ return driver; @@ -563,47 +571,42 @@ add_char_data(struct string *screen, struct screen_driver *driver, unsigned char data, unsigned char border) #endif /* !CONFIG_UTF8 */ { - /* CONFIG_UTF8 use_utf8_io border data add_to_string - * ----------- ----------- ------ ---------------- ---------------- - * not defined 0 0 terminal unibyte terminal unibyte - * not defined 0 1 enum border_char border unibyte - * not defined 1 0 terminal unibyte UTF-8 - * not defined 1 1 enum border_char UTF-8 - * defined 0 0 terminal unibyte terminal unibyte - * defined 0 1 enum border_char border unibyte - * defined 1 0 UTF-32 UTF-8 - * defined 1 1 enum border_char UTF-8 + /* charset use_utf8_io border data add_to_string + * ------- ----------- ------ ---------------- ---------------- + * unibyte 0 0 terminal unibyte terminal unibyte + * unibyte 0 1 enum border_char border unibyte + * unibyte 1 0 terminal unibyte UTF-8 + * unibyte 1 1 enum border_char UTF-8 + * UTF-8 1 0 UTF-32 (*) UTF-8 + * UTF-8 1 1 enum border_char UTF-8 * - * For "UTF-32" above, data can also be UCS_NO_CHAR, + * (*) For "UTF-32" above, data can also be UCS_NO_CHAR, * in which case this function must not alter *screen. */ if (border && driver->opt.frame && data >= 176 && data < 224) data = driver->opt.frame[data - 176]; - if (use_utf8_io(driver)) { #ifdef CONFIG_UTF8 + if (driver->opt.utf8_cp) { if (border) { - int charset = driver->opt.charsets[!!border]; - - add_to_string(screen, cp2utf8(charset, - (unsigned char) data)); - } else { - if (data == UCS_NO_CHAR) - return; - - if (!isscreensafe_ucs(data)) - data = UCS_SPACE; - add_to_string(screen, encode_utf8(data)); + data = cp2u(driver->opt.charsets[1], + (unsigned char) data); } -#else /* !CONFIG_UTF8 */ + if (data == UCS_NO_CHAR) + return; + if (!isscreensafe_ucs(data)) + data = UCS_SPACE; + add_to_string(screen, encode_utf8(data)); + } else +#endif /* CONFIG_UTF8 */ + if (use_utf8_io(driver)) { int charset = driver->opt.charsets[!!border]; if (border || isscreensafe(data)) add_to_string(screen, cp2utf8(charset, data)); else /* UCS_SPACE <= 0x7F and so fits in one UTF-8 byte */ add_char_to_string(screen, UCS_SPACE); -#endif /* !CONFIG_UTF8 */ } else { if (border || isscreensafe(data)) add_char_to_string(screen, (unsigned char)data); @@ -623,7 +626,7 @@ add_char16(struct string *screen, struct screen_driver *driver, if ( #ifdef CONFIG_UTF8 - (!use_utf8_io(driver) || ch->data != UCS_NO_CHAR) && + !(driver->opt.utf8_cp && ch->data == UCS_NO_CHAR) && #endif /* CONFIG_UTF8 */ border != state->border && driver->opt.frame_seqs ) { @@ -633,7 +636,7 @@ add_char16(struct string *screen, struct screen_driver *driver, if ( #ifdef CONFIG_UTF8 - (!use_utf8_io(driver) || ch->data != UCS_NO_CHAR) && + !(driver->opt.utf8_cp && ch->data == UCS_NO_CHAR) && #endif /* CONFIG_UTF8 */ underline != state->underline && driver->opt.underline ) { @@ -643,7 +646,7 @@ add_char16(struct string *screen, struct screen_driver *driver, if ( #ifdef CONFIG_UTF8 - (!use_utf8_io(driver) || ch->data != UCS_NO_CHAR) && + !(driver->opt.utf8_cp && ch->data == UCS_NO_CHAR) && #endif /* CONFIG_UTF8 */ bold != state->bold ) { @@ -658,7 +661,7 @@ add_char16(struct string *screen, struct screen_driver *driver, if ( #ifdef CONFIG_UTF8 - (!use_utf8_io(driver) || ch->data != UCS_NO_CHAR) && + !(driver->opt.utf8_cp && ch->data == UCS_NO_CHAR) && #endif /* CONFIG_UTF8 */ !compare_color_16(ch->color, state->color) ) { @@ -773,7 +776,7 @@ add_char256(struct string *screen, struct screen_driver *driver, if ( #ifdef CONFIG_UTF8 - (!use_utf8_io(driver) || ch->data != UCS_NO_CHAR) && + !(driver->opt.utf8_cp && ch->data == UCS_NO_CHAR) && #endif /* CONFIG_UTF8 */ attr_delta ) { @@ -801,7 +804,7 @@ add_char256(struct string *screen, struct screen_driver *driver, if ( #ifdef CONFIG_UTF8 - (!use_utf8_io(driver) || ch->data != UCS_NO_CHAR) && + !(driver->opt.utf8_cp && ch->data == UCS_NO_CHAR) && #endif /* CONFIG_UTF8 */ !compare_color_256(ch->color, state->color) ) { @@ -888,7 +891,7 @@ add_char_true(struct string *screen, struct screen_driver *driver, if ( #ifdef CONFIG_UTF8 - (!use_utf8_io(driver) || ch->data != UCS_NO_CHAR) && + !(driver->opt.utf8_cp && ch->data == UCS_NO_CHAR) && #endif /* CONFIG_UTF8 */ attr_delta ) { @@ -916,7 +919,7 @@ add_char_true(struct string *screen, struct screen_driver *driver, if ( #ifdef CONFIG_UTF8 - (!use_utf8_io(driver) || ch->data != UCS_NO_CHAR) && + !(driver->opt.utf8_cp && ch->data == UCS_NO_CHAR) && #endif /* CONFIG_UTF8 */ !compare_color_true(ch->color, state->color) ) { diff --git a/src/terminal/terminal.h b/src/terminal/terminal.h index 933d4cd3..d95da99a 100644 --- a/src/terminal/terminal.h +++ b/src/terminal/terminal.h @@ -132,7 +132,7 @@ struct terminal { /* Indicates whether UTF-8 I/O is used. Forced on if the * UTF-8 charset is selected. (bug 827) */ - unsigned int utf8:1; + unsigned int utf8_io:1; #endif /* CONFIG_UTF8 */ /* The current tab number. */ diff --git a/src/viewer/text/form.c b/src/viewer/text/form.c index 3b35391a..990e5ab1 100644 --- a/src/viewer/text/form.c +++ b/src/viewer/text/form.c @@ -161,12 +161,7 @@ init_form_state(struct document_view *doc_view, doc_cp = doc_view->document->cp; term = doc_view->session->tab->term; -#ifdef CONFIG_UTF8 - if (term->utf8) - viewer_cp = get_cp_index("UTF-8"); - else -#endif - viewer_cp = get_opt_codepage_tree(term->spec, "charset"); + viewer_cp = get_opt_codepage_tree(term->spec, "charset"); mem_free_set(&fs->value, NULL); @@ -384,7 +379,7 @@ draw_form_entry(struct terminal *term, struct document_view *doc_view, x = link->points[0].x + dx; #ifdef CONFIG_UTF8 - if (term->utf8) goto utf8; + if (term->utf8_cp) goto utf8; #endif /* CONFIG_UTF8 */ int_bounds(&fs->vpos, fs->state - fc->size + 1, fs->state); len = strlen(fs->value) - fs->vpos; @@ -540,7 +535,7 @@ drew_char: /* XXX: when can this happen? --pasky */ s = ""; #ifdef CONFIG_UTF8 - if (term->utf8) goto utf8_select; + if (term->utf8_cp) goto utf8_select; #endif /* CONFIG_UTF8 */ len = s ? strlen(s) : 0; for (i = 0; i < link->npoints; i++) { @@ -1379,7 +1374,7 @@ field_op(struct session *ses, struct document_view *doc_view, enum frame_event_status status = FRAME_EVENT_REFRESH; #ifdef CONFIG_UTF8 const unsigned char *ctext; - int utf8 = ses->tab->term->utf8; + int utf8 = ses->tab->term->utf8_cp; #endif /* CONFIG_UTF8 */ assert(ses && doc_view && link && ev); @@ -1766,18 +1761,10 @@ field_op(struct session *ses, struct document_view *doc_view, } #ifdef CONFIG_UTF8 - if (ses->tab->term->utf8) { - /* fs->value is in UTF-8 regardless of - * the charset of the terminal. */ - ctext = encode_utf8(get_kbd_key(ev)); - } else { - /* fs->value is in the charset of the - * terminal. */ - int cp = get_opt_codepage_tree(ses->tab->term->spec, - "charset"); - - ctext = u2cp_no_nbsp(get_kbd_key(ev), cp); - } + /* fs->value is in the charset of the terminal. */ + ctext = u2cp_no_nbsp(get_kbd_key(ev), + get_opt_codepage_tree(ses->tab->term->spec, + "charset")); length = strlen(ctext); if (strlen(fs->value) + length > fc->maxlength diff --git a/src/viewer/text/form.h b/src/viewer/text/form.h index 7d1eff6e..d507d5c5 100644 --- a/src/viewer/text/form.h +++ b/src/viewer/text/form.h @@ -44,12 +44,11 @@ struct form_state { * string always requires calling realloc(). The string is * not normally allowed to grow past @form_control.maxlength * bytes (not counting the null), but there may be ways to get - * longer strings. If CONFIG_UTF8 is defined and UTF-8 I/O is - * enabled for the terminal, then @value is in UTF-8; - * otherwise, @value is in the charset of the terminal, and - * the charset is assumed to be unibyte. (Thus, if you choose - * UTF-8 as the charset but disable UTF-8 I/O, you lose.) - * The charset of the document has no effect here. */ + * longer strings. The string is in the charset of the + * terminal (which can be UTF-8 only if CONFIG_UTF8 is + * defined, and is assumed to be unibyte otherwise). The + * charset of the document and the UTF-8 I/O option have no + * effect here. */ unsigned char *value; /* For FC_TEXT, FC_PASSWORD, and FC_FILE, @state is the byte * position of the insertion point in @value. diff --git a/src/viewer/text/link.c b/src/viewer/text/link.c index 2c500074..9970dd23 100644 --- a/src/viewer/text/link.c +++ b/src/viewer/text/link.c @@ -117,7 +117,7 @@ get_link_cursor_offset(struct document_view *doc_view, struct link *link) #ifdef CONFIG_UTF8 /* The encoding of form fields depends on the terminal, * rather than on the document. */ - int utf8 = doc_view->session->tab->term->utf8; + int utf8 = doc_view->session->tab->term->utf8_cp; #endif /* CONFIG_UTF8 */ switch (link->type) { @@ -1465,7 +1465,7 @@ get_current_link_info(struct session *ses, struct document_view *doc_view) } #ifdef CONFIG_UTF8 - if (term->utf8) + if (term->utf8_cp) decode_uri_string(&str); else #endif /* CONFIG_UTF8 */ diff --git a/src/viewer/text/textarea.c b/src/viewer/text/textarea.c index 14796e34..a6b739ff 100644 --- a/src/viewer/text/textarea.c +++ b/src/viewer/text/textarea.c @@ -413,7 +413,7 @@ draw_textarea(struct terminal *term, struct form_state *fs, if_assert_failed return; #ifdef CONFIG_UTF8 - if (term->utf8) { + if (term->utf8_cp) { draw_textarea_utf8(term, fs, doc_view, link); return; }