1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-06-15 23:35:34 +00:00

Bug 885: Proper charset support in xterm window title

When ELinks runs in an X11 terminal emulator (e.g. xterm), or in GNU
Screen, it tries to update the title of the window to match the title
of the current document.  To do this, ELinks sends an "OSC 1 ; Pt BEL"
sequence to the terminal.  Unfortunately, xterm expects the Pt string
to be in the ISO-8859-1 charset, making it impossible to display e.g.
Cyrillic characters.  In xterm patch #210 (2006-03-12) however, there
is a menu item and a resource that can make xterm take the Pt string
in UTF-8 instead, allowing characters from all around the world.
The downside is that ELinks apparently cannot ask xterm whether the
setting is on or off; so add a terminal._template_.latin1_title option
to ELinks and let the user edit that instead.

Complete list of changes:

- Add the terminal._template_.latin1_title option.  But do not add
  that to the terminal options window because it's already rather
  crowded there.

- In set_window_title(), take a new codepage argument.  Use it to
  decode the title into Unicode characters, and remove only actual
  control characters.  For example, CP437 has graphical characters in
  the 0x80...0x9F range, so don't remove those, even though ISO-8859-1
  has control characters in the same range.  Likewise, don't
  misinterpret single bytes of UTF-8 characters as control characters.

- In set_window_title(), do not truncate the title to the width of the
  window.  The font is likely to be different and proportional anyway.
  But do truncate before 1024 bytes, an xterm limit.

- In struct itrm, add a title_codepage member to remember which
  charset the master said it was going to use in the terminal window
  title.  Initialize title_codepage in handle_trm(), update it in
  dispatch_special() if the master sends the new request
  TERM_FN_TITLE_CODEPAGE, and use it in most set_window_title() calls;
  but not in the one that sets $TERM as the title, because that string
  was not received from the master and should consist of ASCII
  characters only.

- In set_terminal_title(), convert the caller-provided title to
  ISO-8859-1 or UTF-8 if appropriate, and report the codepage to the
  slave with the new TERM_FN_TITLE_CODEPAGE request.  The conversion
  can run out of memory, so return a success/error flag, rather than
  void.  In display_window_title(), check this result and don't update
  caches on error.

- Add a NEWS entry for all of this.
This commit is contained in:
Kalle Olavi Niemitalo 2008-12-29 03:09:53 +02:00 committed by Kalle Olavi Niemitalo
parent 8f4d7f9903
commit b6dfdf86a6
10 changed files with 127 additions and 54 deletions

4
NEWS
View File

@ -15,6 +15,10 @@ includes the changes listed under ``ELinks 0.11.5.GIT'' below.
Previously, they could turn into spaces or disappear entirely.
* Perl scripts can use modules that dynamically load C libraries, like
XML::LibXML::SAX does.
* bug 885: Convert xterm titles to ISO-8859-1 by default, but add an
option to disable this. When removing control characters from a
title, note the charset. Don't truncate titles to the width of the
terminal.
* enhancement: Updated ISO 8859-7, ISO 8859-16, KOI8-R, and MacRoman.
ELinks 0.12pre2:

View File

@ -847,6 +847,19 @@ static struct option_info config_options_info[] = {
"3 is KOI-8\n"
"4 is FreeBSD")),
INIT_OPT_BOOL("terminal._template_", N_("Always encode xterm title in ISO-8859-1"),
"latin1_title", 0, 1,
N_("When updating the window title of xterm or a similar "
"terminal emulator, encode the title in ISO-8859-1 (Latin-1), "
"rather than in the charset used for other text in the window. "
"Cyrillic and other characters get replaced with Latin ones. "
"Xterm requires this unless you explicitly enable UTF-8 "
"titles in it.\n"
"\n"
"If this option does not take effect immediately, try switching "
"to a different page so that ELinks notices it needs to update "
"the title.")),
INIT_OPT_BOOL("terminal._template_", N_("Switch fonts for line drawing"),
"m11_hack", 0, 0,
N_("Switch fonts when drawing lines, enabling both local characters\n"

View File

@ -489,12 +489,12 @@ display_window_title(struct session *ses, struct terminal *term)
if (!title) return;
titlelen = strlen(title);
if (last_ses != ses
|| !status->last_title
|| strlen(status->last_title) != titlelen
|| memcmp(status->last_title, title, titlelen)) {
if ((last_ses != ses
|| !status->last_title
|| strlen(status->last_title) != titlelen
|| memcmp(status->last_title, title, titlelen))
&& set_terminal_title(term, title) >= 0) {
mem_free_set(&status->last_title, title);
set_terminal_title(term, title);
last_ses = ses;
} else {
mem_free(title);

View File

@ -303,7 +303,7 @@ get_window_title(void)
}
void
set_window_title(unsigned char *title)
set_window_title(unsigned char *title, int codepage)
{
#ifndef DEBUG_OS2
static PTIB tib;

View File

@ -407,73 +407,71 @@ set_clipboard_text(unsigned char *data)
/* Set xterm-like term window's title. */
void
set_window_title(unsigned char *title)
set_window_title(unsigned char *title, int codepage)
{
unsigned char *s;
int xsize, ysize;
int j = 0;
struct string filtered;
#ifndef HAVE_SYS_CYGWIN_H
/* Check if we're in a xterm-like terminal. */
if (!is_xterm() && !is_gnuscreen()) return;
#endif
/* Retrieve terminal dimensions. */
get_terminal_size(0, &xsize, &ysize);
if (!init_string(&filtered)) return;
/* Check if terminal width is reasonnable. */
if (xsize < 1 || xsize > 1024) return;
/* Allocate space for title + 3 ending points + null char. */
s = mem_alloc(xsize + 3 + 1);
if (!s) return;
/* Copy title to s if different from NULL */
/* Copy title to filtered if different from NULL */
if (title) {
int i;
unsigned char *scan = title;
unsigned char *end = title + strlen(title);
/* We limit title length to terminal width and ignore control
* chars if any. Note that in most cases window decoration
* reduces printable width, so it's just a precaution. */
/* Remove control characters, so that they cannot
* interfere with the command we send to the terminal.
* However, do not attempt to limit the title length
* to terminal width, because the title is usually
* drawn in a different font anyway. */
/* Note that this is the right place where to do it, since
* potential alternative set_window_title() routines might
* want to take different precautions. */
for (i = 0; title[i] && i < xsize; i++) {
/* 0x80 .. 0x9f are ISO-8859-* control characters.
* In some other encodings they could be used for
* legitimate characters, though (ie. in Kamenicky).
* We should therefore maybe check for these only
* if the terminal is running in an ISO- encoding. */
if (iscntrl(title[i]) || (title[i] & 0x7f) < 0x20
|| title[i] == 0x7f)
for (;;) {
unsigned char *charbegin = scan;
unicode_val_T unicode
= cp_to_unicode(codepage, &scan, end);
int charlen = scan - charbegin;
if (unicode == UCS_NO_CHAR)
break;
/* This need not recognize all Unicode control
* characters. Only those that can make the
* terminal misparse the command. */
if (unicode < 0x20
|| (unicode >= 0x7F && unicode < 0xA0))
continue;
s[j++] = title[i];
}
/* xterm entirely rejects 1024-byte or longer
* titles. */
if (filtered.length + charlen >= 1024 - 3) {
add_to_string(&filtered, "...");
break;
}
/* If title is truncated, add "..." */
if (i == xsize) {
s[j++] = '.';
s[j++] = '.';
s[j++] = '.';
add_bytes_to_string(&filtered, charbegin, charlen);
}
}
s[j] = '\0';
/* Send terminal escape sequence + title string */
printf("\033]0;%s\a", s);
printf("\033]0;%s\a", filtered.source);
#if 0
/* Miciah don't like this so it is disabled because it changes the
* default window name. --jonas */
/* Set the GNU screen window name */
if (is_gnuscreen())
printf("\033k%s\033\134", s);
printf("\033k%s\033\134", filtered.source);
#endif
fflush(stdout);
mem_free(s);
done_string(&filtered);
}
#ifdef HAVE_X11

View File

@ -38,7 +38,7 @@ void resume_mouse(void *);
int start_thread(void (*)(void *, int), void *, int);
unsigned char *get_clipboard_text(void);
void set_clipboard_text(unsigned char *);
void set_window_title(unsigned char *);
void set_window_title(unsigned char *, int codepage);
unsigned char *get_window_title(void);
void block_stdin(void);
void unblock_stdin(void);

View File

@ -101,6 +101,7 @@ struct itrm {
unsigned char *orig_title; /**< For restoring window title */
int verase; /**< Byte to map to KBD_BS, or -1 */
int title_codepage; /**< Codepage of terminal title */
unsigned int blocked:1; /**< Whether it was blocked */
unsigned int altscreen:1; /**< Whether to use alternate screen */
unsigned int touched_title:1; /**< Whether the term title was changed */

View File

@ -324,6 +324,11 @@ handle_trm(int std_in, int std_out, int sock_in, int sock_out, int ctl_in,
itrm->timer = TIMER_ID_UNDEF;
itrm->remote = !!remote;
/* If the master does not tell which charset it's using in
* this terminal, assume it's some ISO 8859. Because that's
* what older versions of ELinks did. */
itrm->title_codepage = get_cp_index("ISO-8859-1");
/* FIXME: Combination altscreen + xwin does not work as it should,
* mouse clicks are reportedly partially ignored. */
if (info.system_env & (ENV_SCREEN | ENV_XWIN))
@ -415,7 +420,7 @@ free_itrm(struct itrm *itrm)
if (!itrm->remote) {
if (itrm->orig_title && *itrm->orig_title) {
set_window_title(itrm->orig_title);
set_window_title(itrm->orig_title, itrm->title_codepage);
} else if (itrm->touched_title) {
/* Set the window title to the value of $TERM if X11
@ -425,7 +430,8 @@ free_itrm(struct itrm *itrm)
get_terminal_name(title);
if (*title)
set_window_title(title);
set_window_title(title,
get_cp_index("US-ASCII"));
}
@ -498,7 +504,12 @@ dispatch_special(unsigned char *text)
ditrm->orig_title = get_window_title();
ditrm->touched_title = 1;
}
set_window_title(text + 1);
/* TODO: Is it really possible to get here with
* ditrm == NULL, and which charset would then
* be most appropriate? */
set_window_title(text + 1,
ditrm ? ditrm->title_codepage
: get_cp_index("US-ASCII"));
break;
case TERM_FN_RESIZE:
if (ditrm && ditrm->remote)
@ -506,6 +517,18 @@ dispatch_special(unsigned char *text)
resize_terminal_from_str(text + 1);
break;
case TERM_FN_TITLE_CODEPAGE:
if (ditrm) {
int cp = get_cp_index(text + 1);
/* If the master sends the name of an
* unrecognized charset, assume only
* that it's ASCII compatible. */
if (cp == -1)
cp = get_cp_index("US-ASCII");
ditrm->title_codepage = cp;
}
break;
}
}

View File

@ -372,12 +372,44 @@ do_terminal_function(struct terminal *term, unsigned char code,
fmem_free(x_data);
}
void
/** @return negative on error; zero or positive on success. */
int
set_terminal_title(struct terminal *term, unsigned char *title)
{
if (term->title && !strcmp(title, term->title)) return;
int from_cp;
int to_cp;
unsigned char *converted = NULL;
if (term->title && !strcmp(title, term->title)) return 0;
/* In which codepage was the title parameter given? */
from_cp = get_terminal_codepage(term);
/* In which codepage does the terminal want the title? */
if (get_opt_bool_tree(term->spec, "latin1_title"))
to_cp = get_cp_index("ISO-8859-1");
else if (get_opt_bool_tree(term->spec, "utf_8_io"))
to_cp = get_cp_index("UTF-8");
else
to_cp = from_cp;
if (from_cp != to_cp) {
struct conv_table *convert_table;
convert_table = get_translation_table(from_cp, to_cp);
if (!convert_table) return -1;
converted = convert_string(convert_table, title, strlen(title),
to_cp, CSM_NONE, NULL, NULL, NULL);
if (!converted) return -1;
}
mem_free_set(&term->title, stracpy(title));
do_terminal_function(term, TERM_FN_TITLE, title);
do_terminal_function(term, TERM_FN_TITLE_CODEPAGE,
get_cp_mime_name(to_cp));
do_terminal_function(term, TERM_FN_TITLE,
converted ? converted : title);
mem_free_if(converted);
return 0;
}
static int terminal_pipe[2];

View File

@ -185,12 +185,14 @@ void close_handle(void *);
void assert_terminal_ptr_not_dangling(const struct terminal *);
#endif
/** Operations that can be requested with do_terminal_function().
/** Operations that can be requested with do_terminal_function() in
* the master and then executed with dispatch_special() in a slave.
* The interlink protocol passes these values as one byte in a
* null-terminated string, so zero cannot be used. */
enum {
TERM_FN_TITLE = 1,
TERM_FN_RESIZE = 2
TERM_FN_TITLE = 1,
TERM_FN_RESIZE = 2,
TERM_FN_TITLE_CODEPAGE = 3
};
/** How to execute a program in a terminal. These values are used in
@ -211,7 +213,7 @@ enum term_exec {
void exec_on_terminal(struct terminal *, unsigned char *, unsigned char *, enum term_exec);
void exec_shell(struct terminal *term);
void set_terminal_title(struct terminal *, unsigned char *);
int set_terminal_title(struct terminal *, unsigned char *);
void do_terminal_function(struct terminal *, unsigned char, unsigned char *);
int check_terminal_pipes(void);