Bug 914: Don't let UTF-8 I/O affect internal representations.

Use it for the actual I/O only. Previously, defining CONFIG_UTF8 and enabling UTF-8 used to force many strings to the UTF-8 charset regardless of the terminal charset option. Now, those strings always follow the terminal charset. This fixes bug 914 which was caused because _() returned strings in the terminal charset and functions then assumed they were in UTF-8. This reduction in the effects of UTF-8 I/O may also simplify future testing.
2025-06-30 22:19:29 -04:00 · 2007-05-20 15:31:02 +03:00 · 2007-05-20 15:31:02 +03:00 · 45d1750d03
commit 45d1750d03
parent 36287949a0
24 changed files with 133 additions and 154 deletions
--- a/src/bfu/button.c
+++ b/src/bfu/button.c
@ -130,7 +130,7 @@ dlg_format_buttons(struct terminal *term,
 			mw = 0;
 #ifdef CONFIG_UTF8
 			buttons_width(widget_data1, i2 - i1 + 1, NULL, &mw,
-				      term->utf8);
+				      term->utf8_cp);
 #else
 			buttons_width(widget_data1, i2 - i1 + 1, NULL, &mw);
 #endif /* CONFIG_UTF8 */
@ -140,7 +140,7 @@ dlg_format_buttons(struct terminal *term,

 		mw = 0;
 #ifdef CONFIG_UTF8
-		buttons_width(widget_data1, i2 - i1, NULL, &mw, term->utf8);
+		buttons_width(widget_data1, i2 - i1, NULL, &mw, term->utf8_cp);
 #else
 		buttons_width(widget_data1, i2 - i1, NULL, &mw);
 #endif /* CONFIG_UTF8 */
@ -156,7 +156,7 @@ dlg_format_buttons(struct terminal *term,

 			for (i = i1; i < i2; i++) {
 #ifdef CONFIG_UTF8
-				if (term->utf8)
+				if (term->utf8_cp)
 					set_box(&widget_data[i].box,
 						p, *y,
 						utf8_ptr2cells(widget_data[i].widget->text, NULL)
@ -196,7 +196,7 @@ display_button(struct dialog_data *dlg_data, struct widget_data *widget_data)
 	if (!color || !shortcut_color) return EVENT_PROCESSED;

 #ifdef CONFIG_UTF8
-	if (term->utf8) {
+	if (term->utf8_cp) {
 		int button_left_len = utf8_ptr2cells(BUTTON_LEFT, NULL);
 		int button_right_len = utf8_ptr2cells(BUTTON_RIGHT, NULL);

@ -222,7 +222,7 @@ display_button(struct dialog_data *dlg_data, struct widget_data *widget_data)
 		     ? SCREEN_ATTR_UNDERLINE : 0;

 #ifdef CONFIG_UTF8
-		if (term->utf8) {
+		if (term->utf8_cp) {
 			if (hk_pos >= 0) {
 				int hk_bytes = utf8charlen(&text[hk_pos+1]);
 				int cells_to_hk = utf8_ptr2cells(text,
@ -286,7 +286,7 @@ display_button(struct dialog_data *dlg_data, struct widget_data *widget_data)
 		}
 	}
 #ifdef CONFIG_UTF8
-	if (term->utf8) {
+	if (term->utf8_cp) {
 		int text_cells = utf8_ptr2cells(widget_data->widget->text, NULL);
 		int hk = (widget_data->widget->info.button.hotkey_pos >= 0);

--- a/src/bfu/dialog.c
+++ b/src/bfu/dialog.c
@ -102,7 +102,7 @@ redraw_dialog(struct dialog_data *dlg_data, int layout)
 			int x, y;

 #ifdef CONFIG_UTF8
-			if (term->utf8)
+			if (term->utf8_cp)
 				titlecells = utf8_ptr2cells(title,
 							    &title[titlelen]);
 #endif /* CONFIG_UTF8 */
@ -110,7 +110,7 @@ redraw_dialog(struct dialog_data *dlg_data, int layout)
 			titlecells = int_min(box.width - 2, titlecells);

 #ifdef CONFIG_UTF8
-			if (term->utf8)
+			if (term->utf8_cp)
 				titlelen = utf8_cells2bytes(title, titlecells,
 							    NULL);
 #endif /* CONFIG_UTF8 */
@ -619,7 +619,7 @@ generic_dialog_layouter(struct dialog_data *dlg_data)
 	int x = 0, y, rw;

 #ifdef CONFIG_UTF8
-	if (term->utf8)
+	if (term->utf8_cp)
 		rw = int_min(w, utf8_ptr2cells(dlg_data->dlg->title, NULL));
 	else
 #endif /* CONFIG_UTF8 */
@ -664,12 +664,12 @@ draw_dialog(struct dialog_data *dlg_data, int width, int height)
 		draw_shadow(term, &dlg_data->box,
 			    get_bfu_color(term, "dialog.shadow"), 2, 1);
 #ifdef CONFIG_UTF8
-		if (term->utf8)
+		if (term->utf8_cp)
 			fix_dwchar_around_box(term, &dlg_data->box, 0, 2, 1);
 #endif /* CONFIG_UTF8 */
 	}
 #ifdef CONFIG_UTF8
-	else if (term->utf8)
+	else if (term->utf8_cp)
 		fix_dwchar_around_box(term, &dlg_data->box, 0, 0, 0);
 #endif /* CONFIG_UTF8 */
 }
--- a/src/bfu/group.c
+++ b/src/bfu/group.c
@ -40,7 +40,7 @@ dlg_format_group(struct terminal *term,
 		int label_padding;

 #ifdef CONFIG_UTF8
-		if (term->utf8) {
+		if (term->utf8_cp) {
 			if (text && *text)
 				label_length = utf8_ptr2cells(text, NULL);
 			else
@ -55,7 +55,7 @@ dlg_format_group(struct terminal *term,
 			width = CHECKBOX_LEN;
 		} else if (widget_is_textfield(widget_data)) {
 #ifdef CONFIG_UTF8
-			if (term->utf8) {
+			if (term->utf8_cp) {
 				width = utf8_ptr2cells(widget_data->widget->data,
 						       NULL);
 			} else
@ -82,7 +82,7 @@ dlg_format_group(struct terminal *term,
 				/* Draw text at right of checkbox. */
 				if (label_length) {
 #ifdef CONFIG_UTF8
-					if (term->utf8) {
+					if (term->utf8_cp) {
 						int lb = utf8_cells2bytes(
 								text,
 								label_length,
@ -108,7 +108,7 @@ dlg_format_group(struct terminal *term,
 				/* Draw label at left of widget. */
 				if (label_length) {
 #ifdef CONFIG_UTF8
-					if (term->utf8) {
+					if (term->utf8_cp) {
 						int lb = utf8_cells2bytes(
 								text,
 								label_length,
@ -149,7 +149,7 @@ group_layouter(struct dialog_data *dlg_data)
 	int n = dlg_data->number_of_widgets - 2;

 #ifdef CONFIG_UTF8
-	if (term->utf8)
+	if (term->utf8_cp)
 		rw = int_min(w, utf8_ptr2cells(dlg_data->dlg->title, NULL));
 	else
 #endif /* CONFIG_UTF8 */
--- a/src/bfu/inpfield.c
+++ b/src/bfu/inpfield.c
@ -271,7 +271,7 @@ display_field_do(struct dialog_data *dlg_data, struct widget_data *widget_data,
 #endif /* CONFIG_UTF8 */

 #ifdef CONFIG_UTF8
-	if (term->utf8) {
+	if (term->utf8_cp) {
 		unsigned char *t = widget_data->cdata;
 		int p = widget_data->info.field.cpos;

@ -298,9 +298,9 @@ display_field_do(struct dialog_data *dlg_data, struct widget_data *widget_data,
 		int len, w;

 #ifdef CONFIG_UTF8
-		if (term->utf8 && !hide)
+		if (term->utf8_cp && !hide)
 			len = utf8_ptr2cells(text, NULL);
-		else if (term->utf8)
+		else if (term->utf8_cp)
 			len = utf8_ptr2chars(text, NULL);
 		else
 #endif /* CONFIG_UTF8 */
@ -309,7 +309,7 @@ display_field_do(struct dialog_data *dlg_data, struct widget_data *widget_data,

 		if (!hide) {
 #ifdef CONFIG_UTF8
-			if (term->utf8)
+			if (term->utf8_cp)
 				w = utf8_cells2bytes(text, w, NULL);
 #endif /* CONFIG_UTF8 */
 			draw_text(term, widget_data->box.x, widget_data->box.y,
@ -328,7 +328,7 @@ display_field_do(struct dialog_data *dlg_data, struct widget_data *widget_data,
 		int x;

 #ifdef CONFIG_UTF8
-		if (term->utf8)
+		if (term->utf8_cp)
 			x = widget_data->box.x + len - left;
 		else
 #endif /* CONFIG_UTF8 */
@ -474,7 +474,7 @@ kbd_field(struct dialog_data *dlg_data, struct widget_data *widget_data)
 		case ACT_EDIT_RIGHT:
 			if (widget_data->info.field.cpos < strlen(widget_data->cdata)) {
 #ifdef CONFIG_UTF8
-				if (term->utf8) {
+				if (term->utf8_cp) {
 					unsigned char *next = widget_data->cdata + widget_data->info.field.cpos;
 					unsigned char *end = strchr(next, '\0');

@ -492,7 +492,7 @@ kbd_field(struct dialog_data *dlg_data, struct widget_data *widget_data)
 			if (widget_data->info.field.cpos > 0)
 				widget_data->info.field.cpos--;
 #ifdef CONFIG_UTF8
-			if (widget_data->info.field.cpos && term->utf8) {
+			if (widget_data->info.field.cpos && term->utf8_cp) {
 				unsigned char *t = widget_data->cdata;
 				unsigned char *t2 = t;
 				int p = widget_data->info.field.cpos;
@ -517,7 +517,7 @@ kbd_field(struct dialog_data *dlg_data, struct widget_data *widget_data)

 		case ACT_EDIT_BACKSPACE:
 #ifdef CONFIG_UTF8
-			if (widget_data->info.field.cpos && term->utf8) {
+			if (widget_data->info.field.cpos && term->utf8_cp) {
 				/* XXX: stolen from src/viewer/text/form.c */
 				/* FIXME: This isn't nice. We remove last byte
 				 *        from UTF-8 character to detect
@ -559,7 +559,7 @@ kbd_field(struct dialog_data *dlg_data, struct widget_data *widget_data)
 				if (widget_data->info.field.cpos >= cdata_len) goto display_field;

 #ifdef CONFIG_UTF8
-				if (term->utf8) {
+				if (term->utf8_cp) {
 					unsigned char *end = widget_data->cdata + cdata_len;
 					unsigned char *text = widget_data->cdata + widget_data->info.field.cpos;
 					unsigned char *old = text;
@ -688,22 +688,11 @@ kbd_field(struct dialog_data *dlg_data, struct widget_data *widget_data)
 				const unsigned char *ins;
 				int inslen;

-				if (term->utf8) {
-					/* get_kbd_key(ev) is in UCS-4,
-					 * and @text is in UTF-8.  */
-					ins = encode_utf8(get_kbd_key(ev));
-					/* get_kbd_key(ev) cannot be L'\0'
-					 * because @check_kbd_textinput_key
-					 * would have rejected it.  So it
-					 * is OK to use @strlen below.  */
-				} else {
-					/* get_kbd_key(ev) is UCS-4, and @text
-					 * is in the terminal's charset.  */
-					int cp = get_opt_codepage_tree(term->spec,
-								       "charset");
-
-					ins = u2cp_no_nbsp(get_kbd_key(ev), cp);
-				}
+				/* get_kbd_key(ev) is UCS-4, and @text
+				 * is in the terminal's charset.  */
+				ins = u2cp_no_nbsp(get_kbd_key(ev),
+						   get_opt_codepage_tree(term->spec,
+									 "charset"));
 				inslen = strlen(ins);
 #endif /* CONFIG_UTF8 */

--- a/src/bfu/listbox.c
+++ b/src/bfu/listbox.c
@ -463,7 +463,7 @@ display_listbox_item(struct listbox_item *item, void *data_, int *offset)
 		len = strlen(text);
 		int_upper_bound(&len, int_max(0, data->widget_data->box.width - depth * 5));
 #ifdef CONFIG_UTF8
-		if (data->term->utf8)
+		if (data->term->utf8_cp)
 			len_bytes = utf8_cells2bytes(text, len, NULL);
 		else
 #endif /* CONFIG_UTF8 */
--- a/src/bfu/menu.c
+++ b/src/bfu/menu.c
@ -201,7 +201,7 @@ get_menuitem_text_width(struct terminal *term, struct menu_item *mi)
 	if (!text[0]) return 0;

 #ifdef CONFIG_UTF8
-	if (term->utf8)
+	if (term->utf8_cp)
 		return L_TEXT_SPACE + utf8_ptr2cells(text, NULL)
 		       - !!mi->hotkey_pos + R_TEXT_SPACE;
 	else
@ -383,7 +383,7 @@ draw_menu_left_text(struct terminal *term, unsigned char *text, int len,
 	if (!len) return;

 #ifdef CONFIG_UTF8
-	if (term->utf8) {
+	if (term->utf8_cp) {
 		max_len = utf8_cells2bytes(text, w, NULL);
 		if (max_len <= 0)
 			return;
@ -431,7 +431,7 @@ draw_menu_left_text_hk(struct terminal *term, unsigned char *text,
 	}

 #ifdef CONFIG_UTF8
-	if (term->utf8) goto utf8;
+	if (term->utf8_cp) goto utf8;
 #endif /* CONFIG_UTF8 */

 	for (x = 0; x - !!hk_state < w && (c = text[x]); x++) {
@ -561,12 +561,12 @@ display_menu(struct terminal *term, struct menu *menu)
 		draw_shadow(term, &menu->box,
 			    get_bfu_color(term, "dialog.shadow"), 2, 1);
 #ifdef CONFIG_UTF8
-		if (term->utf8)
+		if (term->utf8_cp)
 			fix_dwchar_around_box(term, &box, 1, 2, 1);
 #endif /* CONFIG_UTF8 */
 	}
 #ifdef CONFIG_UTF8
-	else if (term->utf8)
+	else if (term->utf8_cp)
 		fix_dwchar_around_box(term, &box, 1, 0, 0);
 #endif /* CONFIG_UTF8 */

@ -1115,7 +1115,7 @@ display_mainmenu(struct terminal *term, struct menu *menu)

 		textlen = strlen(text) - !!l;
 #ifdef CONFIG_UTF8
-		if (term->utf8)
+		if (term->utf8_cp)
 			screencnt = utf8_ptr2cells(text, NULL) - !!l;
 		else
 #endif /* CONFIG_UTF8 */
@ -1125,7 +1125,7 @@ display_mainmenu(struct terminal *term, struct menu *menu)
 			color = selected_color;
 			box.x = p;
 #ifdef CONFIG_UTF8
-			if (term->utf8)
+			if (term->utf8_cp)
 				box.width = L_MAINTEXT_SPACE + L_TEXT_SPACE
 					+ screencnt
 					+ R_TEXT_SPACE + R_MAINTEXT_SPACE;
@ -1164,7 +1164,7 @@ display_mainmenu(struct terminal *term, struct menu *menu)
 	int_lower_bound(&menu->last, menu->first);
 	if (menu->last < menu->size - 1) {
 #ifdef CONFIG_UTF8
-		if (term->utf8) {
+		if (term->utf8_cp) {
 			struct screen_char *schar;

 			schar = get_char(term, term->width - R_MAINMENU_SPACE, 0);
--- a/src/bfu/text.c
+++ b/src/bfu/text.c
@ -231,7 +231,7 @@ dlg_format_text_do(struct terminal *term, unsigned char *text,
 		if (!*text) break;

 #ifdef CONFIG_UTF8
-		line_width = split_line(text, width, &cells, term->utf8);
+		line_width = split_line(text, width, &cells, term->utf8_cp);
 #else
 		line_width = split_line(text, width, &cells);
 #endif /* CONFIG_UTF8 */
@ -290,7 +290,7 @@ dlg_format_text(struct terminal *term, struct widget_data *widget_data,
 		 * split if we don't have to */
 #ifdef CONFIG_UTF8
 		if (widget_data->box.width != width
-		    && !split_lines(widget_data, width, term->utf8))
+		    && !split_lines(widget_data, width, term->utf8_cp))
 			return;
 #else
 		if (widget_data->box.width != width
--- a/src/bfu/widget.h
+++ b/src/bfu/widget.h
@ -52,11 +52,10 @@ struct widget {
 struct widget_data {
 	struct widget *widget;

-	/* For WIDGET_FIELD: If CONFIG_UTF8 is defined and UTF-8 I/O
-	 * is enabled for the terminal, then @cdata is in UTF-8;
-	 * otherwise, @cdata is in the charset of the terminal, and
-	 * the charset is assumed to be unibyte.  (Thus, if you choose
-	 * UTF-8 as the charset but disable UTF-8 I/O, you lose.)
+	/* For WIDGET_FIELD: @cdata is in the charset of the terminal.
+	 * (That charset can be UTF-8 only if CONFIG_UTF8 is defined,
+	 * and is assumed to be unibyte otherwise.)  The UTF-8 I/O
+	 * option has no effect here.
 	 *
 	 * For WIDGET_TEXT: @cdata is cast from/to an unsigned char **
 	 * that points to the first element of an array.  Each element
--- a/src/dialogs/download.c
+++ b/src/dialogs/download.c
@ -146,7 +146,7 @@ download_dialog_layouter(struct dialog_data *dlg_data)
 		return;
 	}
 #ifdef CONFIG_UTF8
-	if (term->utf8)
+	if (term->utf8_cp)
 		decode_uri(url);
 	else
 #endif /* CONFIG_UTF8 */
@ -303,7 +303,7 @@ get_file_download_text(struct listbox_item *item, struct terminal *term)
 	uristring = get_uri_string(file_download->uri, URI_PUBLIC);
 	if (uristring) {
 #ifdef CONFIG_UTF8
-		if (term->utf8)
+		if (term->utf8_cp)
 			decode_uri(uristring);
 		else
 #endif /* CONFIG_UTF8 */
--- a/src/dialogs/menu.c
+++ b/src/dialogs/menu.c
@ -586,7 +586,7 @@ query_file(struct session *ses, struct uri *uri, void *data,

 	/* Remove the %-ugliness for display */
 #ifdef CONFIG_UTF8
-	if (ses->tab->term->utf8)
+	if (ses->tab->term->utf8_cp)
 		decode_uri_string(&def);
 	else
 #endif /* CONFIG_UTF8 */
--- a/src/dialogs/status.c
+++ b/src/dialogs/status.c
@ -427,7 +427,7 @@ display_title_bar(struct session *ses, struct terminal *term)
 		int titlelen, titlewidth;

 #ifdef CONFIG_UTF8
-		if (term->utf8) {
+		if (term->utf8_cp) {
 			titlewidth = utf8_ptr2cells(document->title, NULL);
 			titlewidth = int_min(titlewidth, maxlen);

@ -452,7 +452,7 @@ display_title_bar(struct session *ses, struct terminal *term)
 	if (title.length) {
 		int x;
 #ifdef CONFIG_UTF8
-		if (term->utf8) {
+		if (term->utf8_cp) {
 			x = int_max(term->width - 1
 				    - utf8_ptr2cells(title.source,
 						     title.source
--- a/src/document/forms.h
+++ b/src/document/forms.h
@ -98,12 +98,11 @@ struct form_control {
 	int maxlength;
 	int nvalues;
 	unsigned char **values;
-	/* For FC_SELECT: If CONFIG_UTF8 is defined and UTF-8 I/O is
-	 * enabled for the terminal, then @labels is in UTF-8;
-	 * otherwise, @labels is in the charset of the terminal, and
-	 * the charset is assumed to be unibyte.  (Thus, if you choose
-	 * UTF-8 as the charset but disable UTF-8 I/O, you lose.)  The
-	 * charset of the document has no effect here.  */
+	/* For FC_SELECT: @labels are in the charset of the terminal.
+	 * (That charset can be UTF-8 only if CONFIG_UTF8 is defined,
+	 * and is assumed to be unibyte otherwise.)  The charset of
+	 * the document and the UTF-8 I/O option have no effect
+	 * here.  */
 	unsigned char **labels;
 	struct menu_item *menu;
 };
--- a/src/protocol/bittorrent/dialogs.c
+++ b/src/protocol/bittorrent/dialogs.c
@ -575,7 +575,7 @@ bittorrent_message_dialog(struct session *ses, void *data)
 	uristring = get_uri_string(message->uri, URI_PUBLIC);
 	if (uristring) {
 #ifdef CONFIG_UTF8
-		if (ses->tab->term->utf8)
+		if (ses->tab->term->utf8_cp)
 			decode_uri(uristring);
 		else
 #endif /* CONFIG_UTF8 */
@ -725,7 +725,7 @@ bittorrent_query_callback(void *data, enum connection_state state,
 		/* Let's make the filename pretty for display & save */
 		/* TODO: The filename can be the empty string here. See bug 396. */
 #ifdef CONFIG_UTF8
-		if (term->utf8)
+		if (term->utf8_cp)
 			decode_uri_string(&filename);
 		else
 #endif /* CONFIG_UTF8 */
--- a/src/session/download.c
+++ b/src/session/download.c
@ -1158,7 +1158,7 @@ do_type_query(struct type_query *type_query, unsigned char *ct, struct mime_hand
 		/* Let's make the filename pretty for display & save */
 		/* TODO: The filename can be the empty string here. See bug 396. */
 #ifdef CONFIG_UTF8
-		if (term->utf8)
+		if (term->utf8_cp)
 			decode_uri_string(&filename);
 		else
 #endif /* CONFIG_UTF8 */
--- a/src/session/session.c
+++ b/src/session/session.c
@ -267,7 +267,7 @@ print_error_dialog(struct session *ses, enum connection_state state,
 	uristring = uri ? get_uri_string(uri, URI_PUBLIC) : NULL;
 	if (uristring) {
 #ifdef CONFIG_UTF8
-		if (ses->tab->term->utf8)
+		if (ses->tab->term->utf8_cp)
 			decode_uri(uristring);
 		else
 #endif /* CONFIG_UTF8 */
--- a/src/terminal/draw.c
+++ b/src/terminal/draw.c
@ -104,6 +104,9 @@ draw_char_color(struct terminal *term, int x, int y, struct color_pair *color)
 	set_screen_dirty(term->screen, y, y);
 }

+/* The data parameter here is like screen_char.data: UCS-4 if the
+ * charset of the terminal is UTF-8 (possible only if CONFIG_UTF8 is
+ * defined), and a byte otherwise.  */
 void
 #ifdef CONFIG_UTF8
 draw_char_data(struct terminal *term, int x, int y, unicode_val_T data)
@ -120,10 +123,10 @@ draw_char_data(struct terminal *term, int x, int y, unsigned char data)
 #ifdef CONFIG_UTF8
 #ifdef CONFIG_DEBUG
 	/* Detect attempt to draw double-width char on the last
-	 * column of terminal.  The unicode_to_cell(data) call
-	 * is in principle wrong if CONFIG_UTF8 is defined but
-	 * UTF-8 I/O is disabled, because @data is then a byte
-	 * in the charset of the terminal; but unicode_to_cell
+	 * column of terminal.  The unicode_to_cell(data) call is
+	 * in principle wrong if CONFIG_UTF8 is defined but the
+	 * charset of the terminal is not UTF-8, because @data
+	 * is then a byte in that charset; but unicode_to_cell
 	 * returns 1 for U+0000...U+00FF so it's not a problem.  */
 	if (unicode_to_cell(data) == 2 && x + 1 > term->width)
 		INTERNAL("Attempt to draw double-width glyph on last column!");
@ -152,7 +155,7 @@ draw_line(struct terminal *term, int x, int y, int l, struct screen_char *line)
 	if (size == 0) return;

 #ifdef CONFIG_UTF8
-	if (term->utf8) {
+	if (term->utf8_cp) {
 		struct screen_char *sc;

 		if (line[0].data == UCS_NO_CHAR && x == 0) {
@ -272,7 +275,7 @@ fix_dwchar_around_box(struct terminal *term, struct box *box, int border,
 	struct screen_char *schar;
 	int height, x, y;

-	if (!term->utf8)
+	if (!term->utf8_cp)
 		return;

 	/* 1 */
@ -497,7 +500,7 @@ draw_text(struct terminal *term, int x, int y,
 	if_assert_failed return;

 #ifdef CONFIG_UTF8
-	if (term->utf8) {
+	if (term->utf8_cp) {
 		draw_text_utf8(term, x, y, text, length, attr, color);
 		return;
 	}
--- a/src/terminal/draw.h
+++ b/src/terminal/draw.h
@ -33,11 +33,11 @@ struct screen_char {
 	/* Contains either character value or frame data.
 	 * If @attr includes SCREEN_ATTR_FRAME, then @data is enum
 	 * border_char; otherwise, @data is a character value.
-	 * If CONFIG_UTF8 is defined, and UTF-8 I/O is enabled for the
-	 * terminal, then the character value is in UCS-4; otherwise,
-	 * it is in the charset of the terminal, and the charset is
-	 * assumed to be unibyte.  (Thus, if you choose UTF-8 as the
-	 * charset but disable UTF-8 I/O, you lose.)  */
+	 * If the charset of the terminal is UTF-8 (which is possible
+	 * only if CONFIG_UTF8 is defined), then the character value
+	 * is in UCS-4; otherwise, the charset is assumed to be
+	 * unibyte, and the character value is a byte in that
+	 * charset.  */
 #ifdef CONFIG_UTF8
 	unicode_val_T data;
 #else
--- a/src/terminal/event.c
+++ b/src/terminal/event.c
@ -187,7 +187,7 @@ check_terminal_name(struct terminal *term, struct terminal_info *info)
 	/* Force UTF-8 I/O if the UTF-8 charset is selected.  Various
 	 * places assume that the terminal's charset is unibyte if
 	 * UTF-8 I/O is disabled.  (bug 827) */
-	term->utf8 = term->utf8_cp
+	term->utf8_io = term->utf8_cp
 		|| get_opt_bool_tree(term->spec, "utf_8_io");
 #endif /* CONFIG_UTF8 */
 }
@ -304,13 +304,13 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev)
 #ifdef CONFIG_UTF8
 		/* struct term_event_keyboard carries UCS-4.
 		 * - If the "utf_8_io" option is true or the "charset"
-		 *   option refers to UTF-8, then term->utf8 is true,
+		 *   option refers to UTF-8, then term->utf8_io is true,
 		 *   and handle_interlink_event() converts from UTF-8
 		 *   to UCS-4.
 		 * - Otherwise, handle_interlink_event() converts from
 		 *   the codepage specified with the "charset" option
 		 *   to UCS-4.  */
-		utf8_io = term->utf8;
+		utf8_io = term->utf8_io;
 #else
 		/* struct term_event_keyboard carries bytes in the
 		 * charset of the terminal.
--- a/src/terminal/screen.c
+++ b/src/terminal/screen.c
@ -326,7 +326,15 @@ set_screen_driver_opt(struct screen_driver *driver, struct option *term_spec)
 			driver->opt.charsets[1] = get_cp_index("koi8-r");

 		} else {
-			driver->opt.charsets[1] = driver->opt.charsets[0];
+#ifdef CONFIG_UTF8
+			/* Don't let driver->opt.charsets[1] become
+			 * UTF-8, because it is passed to cp2u(),
+			 * which supports only unibyte characters.  */
+			if (driver->opt.utf8_cp)
+				driver->opt.charsets[1] = get_cp_index("US-ASCII");
+			else
+#endif	/* CONFIG_UTF8 */
+				driver->opt.charsets[1] = driver->opt.charsets[0];
 		}

 	} else { /* !utf8_io */
@ -385,7 +393,7 @@ add_screen_driver(enum term_mode_type type, struct terminal *term, int env_len)
 	term->spec->change_hook = screen_driver_change_hook;

 #ifdef CONFIG_UTF8
-	term->utf8 = use_utf8_io(driver);
+	term->utf8_io = use_utf8_io(driver);
 	term->utf8_cp = driver->opt.utf8_cp;
 #endif /* CONFIG_UTF8 */

@ -408,7 +416,7 @@ get_screen_driver(struct terminal *term)
 		move_to_top_of_list(active_screen_drivers, driver);

 #ifdef CONFIG_UTF8
-		term->utf8 = use_utf8_io(driver);
+		term->utf8_io = use_utf8_io(driver);
 		term->utf8_cp = driver->opt.utf8_cp;
 #endif /* CONFIG_UTF8 */
 		return driver;
@ -563,47 +571,42 @@ add_char_data(struct string *screen, struct screen_driver *driver,
 	      unsigned char data, unsigned char border)
 #endif /* !CONFIG_UTF8 */
 {
-	/* CONFIG_UTF8  use_utf8_io  border  data              add_to_string
-	 * -----------  -----------  ------  ----------------  ----------------
-	 * not defined  0            0       terminal unibyte  terminal unibyte
-	 * not defined  0            1       enum border_char  border unibyte
-	 * not defined  1            0       terminal unibyte  UTF-8
-	 * not defined  1            1       enum border_char  UTF-8
-	 * defined      0            0       terminal unibyte  terminal unibyte
-	 * defined      0            1       enum border_char  border unibyte
-	 * defined      1            0       UTF-32            UTF-8
-	 * defined      1            1       enum border_char  UTF-8
+	/* charset  use_utf8_io  border  data              add_to_string
+	 * -------  -----------  ------  ----------------  ----------------
+	 * unibyte  0            0       terminal unibyte  terminal unibyte
+	 * unibyte  0            1       enum border_char  border unibyte
+	 * unibyte  1            0       terminal unibyte  UTF-8
+	 * unibyte  1            1       enum border_char  UTF-8
+	 * UTF-8    1            0       UTF-32 (*)        UTF-8
+	 * UTF-8    1            1       enum border_char  UTF-8
 	 *
-	 * For "UTF-32" above, data can also be UCS_NO_CHAR,
+	 * (*) For "UTF-32" above, data can also be UCS_NO_CHAR,
 	 * in which case this function must not alter *screen.
 	 */

 	if (border && driver->opt.frame && data >= 176 && data < 224)
 		data = driver->opt.frame[data - 176];

-	if (use_utf8_io(driver)) {
 #ifdef CONFIG_UTF8
+	if (driver->opt.utf8_cp) {
 		if (border) {
-			int charset = driver->opt.charsets[!!border];
-
-			add_to_string(screen, cp2utf8(charset,
-						      (unsigned char) data));
-		} else {
-			if (data == UCS_NO_CHAR)
-				return;
-
-			if (!isscreensafe_ucs(data))
-				data = UCS_SPACE;
-			add_to_string(screen, encode_utf8(data));
+			data = cp2u(driver->opt.charsets[1],
+				    (unsigned char) data);
 		}
-#else  /* !CONFIG_UTF8 */
+		if (data == UCS_NO_CHAR)
+			return;
+		if (!isscreensafe_ucs(data))
+			data = UCS_SPACE;
+		add_to_string(screen, encode_utf8(data));
+	} else
+#endif /* CONFIG_UTF8 */
+	if (use_utf8_io(driver)) {
 		int charset = driver->opt.charsets[!!border];

 		if (border || isscreensafe(data))
 			add_to_string(screen, cp2utf8(charset, data));
 		else /* UCS_SPACE <= 0x7F and so fits in one UTF-8 byte */
 			add_char_to_string(screen, UCS_SPACE);
-#endif /* !CONFIG_UTF8 */
 	} else {
 		if (border || isscreensafe(data))
 			add_char_to_string(screen, (unsigned char)data);
@ -623,7 +626,7 @@ add_char16(struct string *screen, struct screen_driver *driver,

 	if (
 #ifdef CONFIG_UTF8
-	    (!use_utf8_io(driver) || ch->data != UCS_NO_CHAR) &&
+	    !(driver->opt.utf8_cp && ch->data == UCS_NO_CHAR) &&
 #endif /* CONFIG_UTF8 */
 	    border != state->border && driver->opt.frame_seqs
 	   ) {
@ -633,7 +636,7 @@ add_char16(struct string *screen, struct screen_driver *driver,

 	if (
 #ifdef CONFIG_UTF8
-	    (!use_utf8_io(driver) || ch->data != UCS_NO_CHAR) &&
+	    !(driver->opt.utf8_cp && ch->data == UCS_NO_CHAR) &&
 #endif /* CONFIG_UTF8 */
 	    underline != state->underline && driver->opt.underline
 	   ) {
@ -643,7 +646,7 @@ add_char16(struct string *screen, struct screen_driver *driver,

 	if (
 #ifdef CONFIG_UTF8
-	    (!use_utf8_io(driver) || ch->data != UCS_NO_CHAR) &&
+	    !(driver->opt.utf8_cp && ch->data == UCS_NO_CHAR) &&
 #endif /* CONFIG_UTF8 */
 	    bold != state->bold
 	   ) {
@ -658,7 +661,7 @@ add_char16(struct string *screen, struct screen_driver *driver,

 	if (
 #ifdef CONFIG_UTF8
-	    (!use_utf8_io(driver) || ch->data != UCS_NO_CHAR) &&
+	    !(driver->opt.utf8_cp && ch->data == UCS_NO_CHAR) &&
 #endif /* CONFIG_UTF8 */
 	    !compare_color_16(ch->color, state->color)
 	   ) {
@ -773,7 +776,7 @@ add_char256(struct string *screen, struct screen_driver *driver,

 	if (
 #ifdef CONFIG_UTF8
-	    (!use_utf8_io(driver) || ch->data != UCS_NO_CHAR) &&
+	    !(driver->opt.utf8_cp && ch->data == UCS_NO_CHAR) &&
 #endif /* CONFIG_UTF8 */
 	    attr_delta
 	   ) {
@ -801,7 +804,7 @@ add_char256(struct string *screen, struct screen_driver *driver,

 	if (
 #ifdef CONFIG_UTF8
-	    (!use_utf8_io(driver) || ch->data != UCS_NO_CHAR) &&
+	    !(driver->opt.utf8_cp && ch->data == UCS_NO_CHAR) &&
 #endif /* CONFIG_UTF8 */
 	    !compare_color_256(ch->color, state->color)
 	   ) {
@ -888,7 +891,7 @@ add_char_true(struct string *screen, struct screen_driver *driver,

 	if (
 #ifdef CONFIG_UTF8
-	    (!use_utf8_io(driver) || ch->data != UCS_NO_CHAR) &&
+	    !(driver->opt.utf8_cp && ch->data == UCS_NO_CHAR) &&
 #endif /* CONFIG_UTF8 */
 	    attr_delta
 	   ) {
@ -916,7 +919,7 @@ add_char_true(struct string *screen, struct screen_driver *driver,

 	if (
 #ifdef CONFIG_UTF8
-	    (!use_utf8_io(driver) || ch->data != UCS_NO_CHAR) &&
+	    !(driver->opt.utf8_cp && ch->data == UCS_NO_CHAR) &&
 #endif /* CONFIG_UTF8 */
 	    !compare_color_true(ch->color, state->color)
 	   ) {
--- a/src/terminal/terminal.h
+++ b/src/terminal/terminal.h
@ -132,7 +132,7 @@ struct terminal {

 	/* Indicates whether UTF-8 I/O is used.  Forced on if the
 	 * UTF-8 charset is selected.  (bug 827) */
-	unsigned int utf8:1;
+	unsigned int utf8_io:1;
 #endif /* CONFIG_UTF8 */

 	/* The current tab number. */
--- a/src/viewer/text/form.c
+++ b/src/viewer/text/form.c
@ -161,12 +161,7 @@ init_form_state(struct document_view *doc_view,

 	doc_cp = doc_view->document->cp;
 	term = doc_view->session->tab->term;
-#ifdef CONFIG_UTF8
-	if (term->utf8)
-		viewer_cp = get_cp_index("UTF-8");
-	else
-#endif
-		viewer_cp = get_opt_codepage_tree(term->spec, "charset");
+	viewer_cp = get_opt_codepage_tree(term->spec, "charset");

 	mem_free_set(&fs->value, NULL);

@ -384,7 +379,7 @@ draw_form_entry(struct terminal *term, struct document_view *doc_view,

 			x = link->points[0].x + dx;
 #ifdef CONFIG_UTF8
-			if (term->utf8) goto utf8;
+			if (term->utf8_cp) goto utf8;
 #endif /* CONFIG_UTF8 */
 			int_bounds(&fs->vpos, fs->state - fc->size + 1, fs->state);
 			len = strlen(fs->value) - fs->vpos;
@ -540,7 +535,7 @@ drew_char:
 				/* XXX: when can this happen? --pasky */
 				s = "";
 #ifdef CONFIG_UTF8
-			if (term->utf8) goto utf8_select;
+			if (term->utf8_cp) goto utf8_select;
 #endif /* CONFIG_UTF8 */
 			len = s ? strlen(s) : 0;
 			for (i = 0; i < link->npoints; i++) {
@ -1379,7 +1374,7 @@ field_op(struct session *ses, struct document_view *doc_view,
 	enum frame_event_status status = FRAME_EVENT_REFRESH;
 #ifdef CONFIG_UTF8
 	const unsigned char *ctext;
-	int utf8 = ses->tab->term->utf8;
+	int utf8 = ses->tab->term->utf8_cp;
 #endif /* CONFIG_UTF8 */

 	assert(ses && doc_view && link && ev);
@ -1766,18 +1761,10 @@ field_op(struct session *ses, struct document_view *doc_view,
 			}

 #ifdef CONFIG_UTF8
-			if (ses->tab->term->utf8) {
-				/* fs->value is in UTF-8 regardless of
-				 * the charset of the terminal.  */
-				ctext = encode_utf8(get_kbd_key(ev));
-			} else {
-				/* fs->value is in the charset of the
-				 * terminal.  */
-				int cp = get_opt_codepage_tree(ses->tab->term->spec,
-							       "charset");
-
-				ctext = u2cp_no_nbsp(get_kbd_key(ev), cp);
-			}
+			/* fs->value is in the charset of the terminal.  */
+			ctext = u2cp_no_nbsp(get_kbd_key(ev),
+					     get_opt_codepage_tree(ses->tab->term->spec,
+								   "charset"));
 			length = strlen(ctext);

 			if (strlen(fs->value) + length > fc->maxlength
--- a/src/viewer/text/form.h
+++ b/src/viewer/text/form.h
@ -44,12 +44,11 @@ struct form_state {
 	 * string always requires calling realloc().  The string is
 	 * not normally allowed to grow past @form_control.maxlength
 	 * bytes (not counting the null), but there may be ways to get
-	 * longer strings.  If CONFIG_UTF8 is defined and UTF-8 I/O is
-	 * enabled for the terminal, then @value is in UTF-8;
-	 * otherwise, @value is in the charset of the terminal, and
-	 * the charset is assumed to be unibyte.  (Thus, if you choose
-	 * UTF-8 as the charset but disable UTF-8 I/O, you lose.)
-	 * The charset of the document has no effect here.  */
+	 * longer strings.  The string is in the charset of the
+	 * terminal (which can be UTF-8 only if CONFIG_UTF8 is
+	 * defined, and is assumed to be unibyte otherwise).  The
+	 * charset of the document and the UTF-8 I/O option have no
+	 * effect here.  */
 	unsigned char *value;
 	/* For FC_TEXT, FC_PASSWORD, and FC_FILE, @state is the byte
 	 * position of the insertion point in @value.
--- a/src/viewer/text/link.c
+++ b/src/viewer/text/link.c
@ -117,7 +117,7 @@ get_link_cursor_offset(struct document_view *doc_view, struct link *link)
 #ifdef CONFIG_UTF8
 	/* The encoding of form fields depends on the terminal,
 	 * rather than on the document.  */
-	int utf8 = doc_view->session->tab->term->utf8;
+	int utf8 = doc_view->session->tab->term->utf8_cp;
 #endif /* CONFIG_UTF8 */

 	switch (link->type) {
@ -1465,7 +1465,7 @@ get_current_link_info(struct session *ses, struct document_view *doc_view)
 		}

 #ifdef CONFIG_UTF8
-		if (term->utf8)
+		if (term->utf8_cp)
 			decode_uri_string(&str);
 		else
 #endif /* CONFIG_UTF8 */
--- a/src/viewer/text/textarea.c
+++ b/src/viewer/text/textarea.c
@ -413,7 +413,7 @@ draw_textarea(struct terminal *term, struct form_state *fs,
 	if_assert_failed return;

 #ifdef CONFIG_UTF8
-	if (term->utf8) {
+	if (term->utf8_cp) {
 		draw_textarea_utf8(term, fs, doc_view, link);
 		return;
 	}