From b6dfdf86a6f06db8e8fb2da909777ab04c5a870e Mon Sep 17 00:00:00 2001 From: Kalle Olavi Niemitalo Date: Mon, 29 Dec 2008 03:09:53 +0200 Subject: [PATCH] Bug 885: Proper charset support in xterm window title When ELinks runs in an X11 terminal emulator (e.g. xterm), or in GNU Screen, it tries to update the title of the window to match the title of the current document. To do this, ELinks sends an "OSC 1 ; Pt BEL" sequence to the terminal. Unfortunately, xterm expects the Pt string to be in the ISO-8859-1 charset, making it impossible to display e.g. Cyrillic characters. In xterm patch #210 (2006-03-12) however, there is a menu item and a resource that can make xterm take the Pt string in UTF-8 instead, allowing characters from all around the world. The downside is that ELinks apparently cannot ask xterm whether the setting is on or off; so add a terminal._template_.latin1_title option to ELinks and let the user edit that instead. Complete list of changes: - Add the terminal._template_.latin1_title option. But do not add that to the terminal options window because it's already rather crowded there. - In set_window_title(), take a new codepage argument. Use it to decode the title into Unicode characters, and remove only actual control characters. For example, CP437 has graphical characters in the 0x80...0x9F range, so don't remove those, even though ISO-8859-1 has control characters in the same range. Likewise, don't misinterpret single bytes of UTF-8 characters as control characters. - In set_window_title(), do not truncate the title to the width of the window. The font is likely to be different and proportional anyway. But do truncate before 1024 bytes, an xterm limit. - In struct itrm, add a title_codepage member to remember which charset the master said it was going to use in the terminal window title. Initialize title_codepage in handle_trm(), update it in dispatch_special() if the master sends the new request TERM_FN_TITLE_CODEPAGE, and use it in most set_window_title() calls; but not in the one that sets $TERM as the title, because that string was not received from the master and should consist of ASCII characters only. - In set_terminal_title(), convert the caller-provided title to ISO-8859-1 or UTF-8 if appropriate, and report the codepage to the slave with the new TERM_FN_TITLE_CODEPAGE request. The conversion can run out of memory, so return a success/error flag, rather than void. In display_window_title(), check this result and don't update caches on error. - Add a NEWS entry for all of this. --- NEWS | 4 +++ src/config/options.inc | 13 ++++++++ src/dialogs/status.c | 10 +++--- src/osdep/os2/os2.c | 2 +- src/osdep/osdep.c | 72 ++++++++++++++++++++--------------------- src/osdep/osdep.h | 2 +- src/terminal/itrm.h | 1 + src/terminal/kbd.c | 29 +++++++++++++++-- src/terminal/terminal.c | 38 ++++++++++++++++++++-- src/terminal/terminal.h | 10 +++--- 10 files changed, 127 insertions(+), 54 deletions(-) diff --git a/NEWS b/NEWS index 8b4656f4..24b78b63 100644 --- a/NEWS +++ b/NEWS @@ -15,6 +15,10 @@ includes the changes listed under ``ELinks 0.11.5.GIT'' below. Previously, they could turn into spaces or disappear entirely. * Perl scripts can use modules that dynamically load C libraries, like XML::LibXML::SAX does. +* bug 885: Convert xterm titles to ISO-8859-1 by default, but add an + option to disable this. When removing control characters from a + title, note the charset. Don't truncate titles to the width of the + terminal. * enhancement: Updated ISO 8859-7, ISO 8859-16, KOI8-R, and MacRoman. ELinks 0.12pre2: diff --git a/src/config/options.inc b/src/config/options.inc index ad96f26e..0c508ae4 100644 --- a/src/config/options.inc +++ b/src/config/options.inc @@ -847,6 +847,19 @@ static struct option_info config_options_info[] = { "3 is KOI-8\n" "4 is FreeBSD")), + INIT_OPT_BOOL("terminal._template_", N_("Always encode xterm title in ISO-8859-1"), + "latin1_title", 0, 1, + N_("When updating the window title of xterm or a similar " + "terminal emulator, encode the title in ISO-8859-1 (Latin-1), " + "rather than in the charset used for other text in the window. " + "Cyrillic and other characters get replaced with Latin ones. " + "Xterm requires this unless you explicitly enable UTF-8 " + "titles in it.\n" + "\n" + "If this option does not take effect immediately, try switching " + "to a different page so that ELinks notices it needs to update " + "the title.")), + INIT_OPT_BOOL("terminal._template_", N_("Switch fonts for line drawing"), "m11_hack", 0, 0, N_("Switch fonts when drawing lines, enabling both local characters\n" diff --git a/src/dialogs/status.c b/src/dialogs/status.c index 908ad0b2..b41b0955 100644 --- a/src/dialogs/status.c +++ b/src/dialogs/status.c @@ -489,12 +489,12 @@ display_window_title(struct session *ses, struct terminal *term) if (!title) return; titlelen = strlen(title); - if (last_ses != ses - || !status->last_title - || strlen(status->last_title) != titlelen - || memcmp(status->last_title, title, titlelen)) { + if ((last_ses != ses + || !status->last_title + || strlen(status->last_title) != titlelen + || memcmp(status->last_title, title, titlelen)) + && set_terminal_title(term, title) >= 0) { mem_free_set(&status->last_title, title); - set_terminal_title(term, title); last_ses = ses; } else { mem_free(title); diff --git a/src/osdep/os2/os2.c b/src/osdep/os2/os2.c index 20d6d9f4..16875c14 100644 --- a/src/osdep/os2/os2.c +++ b/src/osdep/os2/os2.c @@ -303,7 +303,7 @@ get_window_title(void) } void -set_window_title(unsigned char *title) +set_window_title(unsigned char *title, int codepage) { #ifndef DEBUG_OS2 static PTIB tib; diff --git a/src/osdep/osdep.c b/src/osdep/osdep.c index 74eab68a..e26463e3 100644 --- a/src/osdep/osdep.c +++ b/src/osdep/osdep.c @@ -407,73 +407,71 @@ set_clipboard_text(unsigned char *data) /* Set xterm-like term window's title. */ void -set_window_title(unsigned char *title) +set_window_title(unsigned char *title, int codepage) { - unsigned char *s; - int xsize, ysize; - int j = 0; + struct string filtered; #ifndef HAVE_SYS_CYGWIN_H /* Check if we're in a xterm-like terminal. */ if (!is_xterm() && !is_gnuscreen()) return; #endif - /* Retrieve terminal dimensions. */ - get_terminal_size(0, &xsize, &ysize); + if (!init_string(&filtered)) return; - /* Check if terminal width is reasonnable. */ - if (xsize < 1 || xsize > 1024) return; - - /* Allocate space for title + 3 ending points + null char. */ - s = mem_alloc(xsize + 3 + 1); - if (!s) return; - - /* Copy title to s if different from NULL */ + /* Copy title to filtered if different from NULL */ if (title) { - int i; + unsigned char *scan = title; + unsigned char *end = title + strlen(title); - /* We limit title length to terminal width and ignore control - * chars if any. Note that in most cases window decoration - * reduces printable width, so it's just a precaution. */ + /* Remove control characters, so that they cannot + * interfere with the command we send to the terminal. + * However, do not attempt to limit the title length + * to terminal width, because the title is usually + * drawn in a different font anyway. */ /* Note that this is the right place where to do it, since * potential alternative set_window_title() routines might * want to take different precautions. */ - for (i = 0; title[i] && i < xsize; i++) { - /* 0x80 .. 0x9f are ISO-8859-* control characters. - * In some other encodings they could be used for - * legitimate characters, though (ie. in Kamenicky). - * We should therefore maybe check for these only - * if the terminal is running in an ISO- encoding. */ - if (iscntrl(title[i]) || (title[i] & 0x7f) < 0x20 - || title[i] == 0x7f) + for (;;) { + unsigned char *charbegin = scan; + unicode_val_T unicode + = cp_to_unicode(codepage, &scan, end); + int charlen = scan - charbegin; + + if (unicode == UCS_NO_CHAR) + break; + + /* This need not recognize all Unicode control + * characters. Only those that can make the + * terminal misparse the command. */ + if (unicode < 0x20 + || (unicode >= 0x7F && unicode < 0xA0)) continue; - s[j++] = title[i]; - } + /* xterm entirely rejects 1024-byte or longer + * titles. */ + if (filtered.length + charlen >= 1024 - 3) { + add_to_string(&filtered, "..."); + break; + } - /* If title is truncated, add "..." */ - if (i == xsize) { - s[j++] = '.'; - s[j++] = '.'; - s[j++] = '.'; + add_bytes_to_string(&filtered, charbegin, charlen); } } - s[j] = '\0'; /* Send terminal escape sequence + title string */ - printf("\033]0;%s\a", s); + printf("\033]0;%s\a", filtered.source); #if 0 /* Miciah don't like this so it is disabled because it changes the * default window name. --jonas */ /* Set the GNU screen window name */ if (is_gnuscreen()) - printf("\033k%s\033\134", s); + printf("\033k%s\033\134", filtered.source); #endif fflush(stdout); - mem_free(s); + done_string(&filtered); } #ifdef HAVE_X11 diff --git a/src/osdep/osdep.h b/src/osdep/osdep.h index 721916fd..15cefc3b 100644 --- a/src/osdep/osdep.h +++ b/src/osdep/osdep.h @@ -38,7 +38,7 @@ void resume_mouse(void *); int start_thread(void (*)(void *, int), void *, int); unsigned char *get_clipboard_text(void); void set_clipboard_text(unsigned char *); -void set_window_title(unsigned char *); +void set_window_title(unsigned char *, int codepage); unsigned char *get_window_title(void); void block_stdin(void); void unblock_stdin(void); diff --git a/src/terminal/itrm.h b/src/terminal/itrm.h index 30e6375e..63163b44 100644 --- a/src/terminal/itrm.h +++ b/src/terminal/itrm.h @@ -101,6 +101,7 @@ struct itrm { unsigned char *orig_title; /**< For restoring window title */ int verase; /**< Byte to map to KBD_BS, or -1 */ + int title_codepage; /**< Codepage of terminal title */ unsigned int blocked:1; /**< Whether it was blocked */ unsigned int altscreen:1; /**< Whether to use alternate screen */ unsigned int touched_title:1; /**< Whether the term title was changed */ diff --git a/src/terminal/kbd.c b/src/terminal/kbd.c index d9ffde75..af37ebd1 100644 --- a/src/terminal/kbd.c +++ b/src/terminal/kbd.c @@ -324,6 +324,11 @@ handle_trm(int std_in, int std_out, int sock_in, int sock_out, int ctl_in, itrm->timer = TIMER_ID_UNDEF; itrm->remote = !!remote; + /* If the master does not tell which charset it's using in + * this terminal, assume it's some ISO 8859. Because that's + * what older versions of ELinks did. */ + itrm->title_codepage = get_cp_index("ISO-8859-1"); + /* FIXME: Combination altscreen + xwin does not work as it should, * mouse clicks are reportedly partially ignored. */ if (info.system_env & (ENV_SCREEN | ENV_XWIN)) @@ -415,7 +420,7 @@ free_itrm(struct itrm *itrm) if (!itrm->remote) { if (itrm->orig_title && *itrm->orig_title) { - set_window_title(itrm->orig_title); + set_window_title(itrm->orig_title, itrm->title_codepage); } else if (itrm->touched_title) { /* Set the window title to the value of $TERM if X11 @@ -425,7 +430,8 @@ free_itrm(struct itrm *itrm) get_terminal_name(title); if (*title) - set_window_title(title); + set_window_title(title, + get_cp_index("US-ASCII")); } @@ -498,7 +504,12 @@ dispatch_special(unsigned char *text) ditrm->orig_title = get_window_title(); ditrm->touched_title = 1; } - set_window_title(text + 1); + /* TODO: Is it really possible to get here with + * ditrm == NULL, and which charset would then + * be most appropriate? */ + set_window_title(text + 1, + ditrm ? ditrm->title_codepage + : get_cp_index("US-ASCII")); break; case TERM_FN_RESIZE: if (ditrm && ditrm->remote) @@ -506,6 +517,18 @@ dispatch_special(unsigned char *text) resize_terminal_from_str(text + 1); break; + case TERM_FN_TITLE_CODEPAGE: + if (ditrm) { + int cp = get_cp_index(text + 1); + + /* If the master sends the name of an + * unrecognized charset, assume only + * that it's ASCII compatible. */ + if (cp == -1) + cp = get_cp_index("US-ASCII"); + ditrm->title_codepage = cp; + } + break; } } diff --git a/src/terminal/terminal.c b/src/terminal/terminal.c index 1cfa08fd..034cb497 100644 --- a/src/terminal/terminal.c +++ b/src/terminal/terminal.c @@ -372,12 +372,44 @@ do_terminal_function(struct terminal *term, unsigned char code, fmem_free(x_data); } -void +/** @return negative on error; zero or positive on success. */ +int set_terminal_title(struct terminal *term, unsigned char *title) { - if (term->title && !strcmp(title, term->title)) return; + int from_cp; + int to_cp; + unsigned char *converted = NULL; + + if (term->title && !strcmp(title, term->title)) return 0; + + /* In which codepage was the title parameter given? */ + from_cp = get_terminal_codepage(term); + + /* In which codepage does the terminal want the title? */ + if (get_opt_bool_tree(term->spec, "latin1_title")) + to_cp = get_cp_index("ISO-8859-1"); + else if (get_opt_bool_tree(term->spec, "utf_8_io")) + to_cp = get_cp_index("UTF-8"); + else + to_cp = from_cp; + + if (from_cp != to_cp) { + struct conv_table *convert_table; + + convert_table = get_translation_table(from_cp, to_cp); + if (!convert_table) return -1; + converted = convert_string(convert_table, title, strlen(title), + to_cp, CSM_NONE, NULL, NULL, NULL); + if (!converted) return -1; + } + mem_free_set(&term->title, stracpy(title)); - do_terminal_function(term, TERM_FN_TITLE, title); + do_terminal_function(term, TERM_FN_TITLE_CODEPAGE, + get_cp_mime_name(to_cp)); + do_terminal_function(term, TERM_FN_TITLE, + converted ? converted : title); + mem_free_if(converted); + return 0; } static int terminal_pipe[2]; diff --git a/src/terminal/terminal.h b/src/terminal/terminal.h index 1b50ca3b..c2c1d79f 100644 --- a/src/terminal/terminal.h +++ b/src/terminal/terminal.h @@ -185,12 +185,14 @@ void close_handle(void *); void assert_terminal_ptr_not_dangling(const struct terminal *); #endif -/** Operations that can be requested with do_terminal_function(). +/** Operations that can be requested with do_terminal_function() in + * the master and then executed with dispatch_special() in a slave. * The interlink protocol passes these values as one byte in a * null-terminated string, so zero cannot be used. */ enum { - TERM_FN_TITLE = 1, - TERM_FN_RESIZE = 2 + TERM_FN_TITLE = 1, + TERM_FN_RESIZE = 2, + TERM_FN_TITLE_CODEPAGE = 3 }; /** How to execute a program in a terminal. These values are used in @@ -211,7 +213,7 @@ enum term_exec { void exec_on_terminal(struct terminal *, unsigned char *, unsigned char *, enum term_exec); void exec_shell(struct terminal *term); -void set_terminal_title(struct terminal *, unsigned char *); +int set_terminal_title(struct terminal *, unsigned char *); void do_terminal_function(struct terminal *, unsigned char, unsigned char *); int check_terminal_pipes(void);