From cc9c620179b063a194e05574fef25fa576f142c2 Mon Sep 17 00:00:00 2001 From: Witold Filipczyk Date: Thu, 20 Nov 2008 15:53:35 +0100 Subject: [PATCH 1/9] pl.po: Statystyki -> Statystyka --- po/pl.po | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/po/pl.po b/po/pl.po index 28fcac44..8c5526f7 100644 --- a/po/pl.po +++ b/po/pl.po @@ -7663,7 +7663,7 @@ msgstr[2] "%u pozostaƂych" #. Statistics: #: src/protocol/bittorrent/dialogs.c:419 msgid "Statistics" -msgstr "Statystyki" +msgstr "Statystyka" #: src/protocol/bittorrent/dialogs.c:423 #, c-format From d668b3b6aa215a9ff6a501c2888faa85cdea44b6 Mon Sep 17 00:00:00 2001 From: Kalle Olavi Niemitalo Date: Wed, 24 Dec 2008 02:54:14 +0200 Subject: [PATCH 2/9] mouse: Exit cursor-routing mode when a link is clicked Before this patch, if you first moved the cursor to link X with move-cursor-up and similar actions, and then clicked link Y with the mouse, ELinks would activate link X, i.e. not the one you clicked. This happened because the NAVIGATE_CURSOR_ROUTING mode was left enabled and made ELinks ignore the doc_view->vs->current_link member that ELinks had updated according to the click. Make ELinks return the session to NAVIGATE_LINKWISE mode, so that the update takes effect. Reported by Paul B. Mahol. (cherry picked from commit 408641806937851f9cce354b356c39ab382c6f8b) --- NEWS | 2 ++ src/viewer/text/view.c | 1 + 2 files changed, 3 insertions(+) diff --git a/NEWS b/NEWS index 11ed89bc..ab36a8e7 100644 --- a/NEWS +++ b/NEWS @@ -242,6 +242,8 @@ To be released as 0.11.6. * major bug 1004: ignore locales when comparing HTML element names and similar strings, so e.g. ``title'' matches ``TITLE'' even in the Turkish locale +* minor: clicking a link with the mouse activates that link, rather + than the one selected with move-cursor-* actions ELinks 0.11.5: -------------- diff --git a/src/viewer/text/view.c b/src/viewer/text/view.c index 407be69a..df7fa0eb 100644 --- a/src/viewer/text/view.c +++ b/src/viewer/text/view.c @@ -1167,6 +1167,7 @@ frame_ev_mouse(struct session *ses, struct document_view *doc_view, struct term_ enum frame_event_status status = FRAME_EVENT_REFRESH; doc_view->vs->current_link = link - doc_view->document->links; + ses->navigate_mode = NAVIGATE_LINKWISE; if (!link_is_textinput(link)) { From 25da8085b3c2d572fa2a00d343eb2cbb66609899 Mon Sep 17 00:00:00 2001 From: Kalle Olavi Niemitalo Date: Wed, 31 Dec 2008 20:06:49 +0000 Subject: [PATCH 3/9] Fix double-free crash if EOF immediately follows . look_for_link() used to return 0 both when it found the closing tag, and when it hit the end of the file. In the first case, it also added *menu to the memory_list; in the second case, it did not. The caller get_image_map() supposedly distinguished between these cases by checking whether pos >= eof, and freed *menu separately if so. However, if the was at the very end of the HTML file, so that not even a newline followed it, then look_for_link() left pos == eof even though it had found the and added *menu to the memory_list. This made get_image_map() misinterpret the result and mem_free(*menu) even though *menu had already been freed as part of the memory_list; thus the crash. To fix this, make look_for_link() return -1 instead of 0 if it hits EOF without finding the . Then make get_image_map() check the return value instead of comparing pos to eof. And add a test case, although not an automated one. Alternatively, look_for_link() could have been changed to decrement pos between finding the and returning 0. Then, the pos >= eof comparison in get_image_map() would have been false. That scheme would however have been a bit more difficult to understand and maintain, I think. Reported by Paul B. Mahol. (cherry picked from commit a2404407ce9b687ef7db12b520909c7f4aeffe91) --- NEWS | 1 + src/document/html/parser.c | 17 ++++++++++++----- test/imgmap2.html | 5 +++++ 3 files changed, 18 insertions(+), 5 deletions(-) create mode 100644 test/imgmap2.html diff --git a/NEWS b/NEWS index ab36a8e7..8b4656f4 100644 --- a/NEWS +++ b/NEWS @@ -237,6 +237,7 @@ ELinks 0.11.5.GIT now: To be released as 0.11.6. +* critical: fix double-free crash if EOF immediately follows * critical bug 1053: fix crash if a download finishes after ELinks has closed the terminal from which the download was started * major bug 1004: ignore locales when comparing HTML element names and diff --git a/src/document/html/parser.c b/src/document/html/parser.c index 4d8c1114..d9e911ab 100644 --- a/src/document/html/parser.c +++ b/src/document/html/parser.c @@ -615,6 +615,9 @@ look_for_tag(unsigned char **pos, unsigned char *eof, return 0; } +/** @return -1 if EOF is hit without the closing tag; 0 if the closing + * tag is found (in which case this also adds *@a menu to *@a ml); or + * 1 if this should be called again. */ static int look_for_link(unsigned char **pos, unsigned char *eof, struct menu_item **menu, struct memory_list **ml, struct uri *href_base, @@ -632,7 +635,7 @@ look_for_link(unsigned char **pos, unsigned char *eof, struct menu_item **menu, (*pos)++; } - if (*pos >= eof) return 0; + if (*pos >= eof) return -1; if (*pos + 2 <= eof && ((*pos)[1] == '!' || (*pos)[1] == '?')) { *pos = skip_comment(*pos, eof); @@ -647,7 +650,7 @@ look_for_link(unsigned char **pos, unsigned char *eof, struct menu_item **menu, if (!c_strlcasecmp(name, namelen, "A", 1)) { while (look_for_tag(pos, eof, name, namelen, &label)); - if (*pos >= eof) return 0; + if (*pos >= eof) return -1; } else if (!c_strlcasecmp(name, namelen, "AREA", 4)) { /* FIXME (bug 784): options->cp is the terminal charset; @@ -765,6 +768,7 @@ get_image_map(unsigned char *head, unsigned char *pos, unsigned char *eof, { struct conv_table *ct; struct string hd; + int look_result; if (!init_string(&hd)) return -1; @@ -785,10 +789,13 @@ get_image_map(unsigned char *head, unsigned char *pos, unsigned char *eof, *ml = NULL; - while (look_for_link(&pos, eof, menu, ml, uri, target_base, ct, options)) - ; + do { + /* This call can modify both *ml and *menu. */ + look_result = look_for_link(&pos, eof, menu, ml, uri, + target_base, ct, options); + } while (look_result > 0); - if (pos >= eof) { + if (look_result < 0) { freeml(*ml); mem_free(*menu); return -1; diff --git a/test/imgmap2.html b/test/imgmap2.html new file mode 100644 index 00000000..e96e1846 --- /dev/null +++ b/test/imgmap2.html @@ -0,0 +1,5 @@ +Double-free crash in USEMAP +

+ +see this? + \ No newline at end of file From ad45176dde9bb1f2d37b427a530bc06d53d04ebb Mon Sep 17 00:00:00 2001 From: Kalle Olavi Niemitalo Date: Sun, 28 Dec 2008 17:42:29 +0200 Subject: [PATCH 4/9] Add get_terminal_codepage(). This simplifies the callers a little and may help implement simultaneous support for different charsets on different terminals of the same type (bug 1064). --- src/bfu/dialog.c | 2 +- src/bfu/hotkey.c | 2 +- src/bfu/inpfield.c | 3 +-- src/dialogs/options.c | 2 +- src/document/renderer.c | 2 +- src/intl/gettext/libintl.h | 2 +- src/protocol/http/codes.c | 3 +-- src/session/task.c | 2 +- src/terminal/event.c | 5 ++--- src/terminal/terminal.c | 18 ++++++++++++++++++ src/terminal/terminal.h | 1 + src/viewer/text/form.c | 7 +++---- src/viewer/text/link.c | 3 +-- 13 files changed, 33 insertions(+), 19 deletions(-) diff --git a/src/bfu/dialog.c b/src/bfu/dialog.c index 6694730d..b2df9f86 100644 --- a/src/bfu/dialog.c +++ b/src/bfu/dialog.c @@ -297,7 +297,7 @@ select_button_by_key(struct dialog_data *dlg_data) #ifdef CONFIG_UTF8 key = unicode_fold_label_case(get_kbd_key(ev)); - codepage = get_opt_codepage_tree(dlg_data->win->term->spec, "charset"); + codepage = get_terminal_codepage(dlg_data->win->term); #else key = toupper(get_kbd_key(ev)); #endif diff --git a/src/bfu/hotkey.c b/src/bfu/hotkey.c index c2622859..c09b6ed8 100644 --- a/src/bfu/hotkey.c +++ b/src/bfu/hotkey.c @@ -127,7 +127,7 @@ check_hotkeys_common(struct menu *menu, term_event_char_T hotkey, struct termina { #ifdef CONFIG_UTF8 unicode_val_T key = unicode_fold_label_case(hotkey); - int codepage = get_opt_codepage_tree(term->spec, "charset"); + int codepage = get_terminal_codepage(term); #else unsigned char key = toupper(hotkey); #endif diff --git a/src/bfu/inpfield.c b/src/bfu/inpfield.c index cab51d86..4c0dcd2e 100644 --- a/src/bfu/inpfield.c +++ b/src/bfu/inpfield.c @@ -691,8 +691,7 @@ kbd_field(struct dialog_data *dlg_data, struct widget_data *widget_data) /* get_kbd_key(ev) is UCS-4, and @text * is in the terminal's charset. */ ins = u2cp_no_nbsp(get_kbd_key(ev), - get_opt_codepage_tree(term->spec, - "charset")); + get_terminal_codepage(term)); inslen = strlen(ins); #endif /* CONFIG_UTF8 */ diff --git a/src/dialogs/options.c b/src/dialogs/options.c index 75a59a8b..f40d07d6 100644 --- a/src/dialogs/options.c +++ b/src/dialogs/options.c @@ -50,7 +50,7 @@ charset_list(struct terminal *term, void *xxx, void *ses_) int i, items; int sel = 0; const unsigned char *const sel_mime = get_cp_mime_name( - get_opt_codepage_tree(term->spec, "charset")); + get_terminal_codepage(term)); struct menu_item *mi = new_menu(FREE_LIST); if (!mi) return; diff --git a/src/document/renderer.c b/src/document/renderer.c index ff303515..e860a924 100644 --- a/src/document/renderer.c +++ b/src/document/renderer.c @@ -455,7 +455,7 @@ render_document_frames(struct session *ses, int no_cache) if (!get_opt_bool_tree(ses->tab->term->spec, "underline")) doc_opts.color_flags |= COLOR_ENHANCE_UNDERLINE; - doc_opts.cp = get_opt_codepage_tree(ses->tab->term->spec, "charset"); + doc_opts.cp = get_terminal_codepage(ses->tab->term); doc_opts.no_cache = no_cache & 1; doc_opts.gradual_rerendering = !!(no_cache & 2); diff --git a/src/intl/gettext/libintl.h b/src/intl/gettext/libintl.h index 105ab3fa..ac9c68d0 100644 --- a/src/intl/gettext/libintl.h +++ b/src/intl/gettext/libintl.h @@ -61,7 +61,7 @@ extern int current_charset; static inline void intl_set_charset(struct terminal *term) { - int new_charset = get_opt_codepage_tree(term->spec, "charset"); + int new_charset = get_terminal_codepage(term); /* Prevent useless switching. */ if (current_charset != new_charset) { diff --git a/src/protocol/http/codes.c b/src/protocol/http/codes.c index e0740eae..c4a260fb 100644 --- a/src/protocol/http/codes.c +++ b/src/protocol/http/codes.c @@ -171,8 +171,7 @@ show_http_error_document(struct session *ses, void *data) if (str) { /* The codepage that _("foo", term) used when it was * called by get_http_error_document. */ - const int gettext_codepage - = get_opt_codepage_tree(term->spec, "charset"); + const int gettext_codepage = get_terminal_codepage(term); if (cached) delete_entry_content(cache); diff --git a/src/session/task.c b/src/session/task.c index e800fec4..eb47431b 100644 --- a/src/session/task.c +++ b/src/session/task.c @@ -388,7 +388,7 @@ ses_imgmap(struct session *ses) &menu, &ml, ses->loading_uri, &doc_view->document->options, ses->task.target.frame, - get_opt_codepage_tree(ses->tab->term->spec, "charset"), + get_terminal_codepage(ses->tab->term), get_opt_codepage("document.codepage.assume"), get_opt_bool("document.codepage.force_assumed"))) return; diff --git a/src/terminal/event.c b/src/terminal/event.c index 2030244e..b9b7ba04 100644 --- a/src/terminal/event.c +++ b/src/terminal/event.c @@ -148,7 +148,7 @@ term_send_ucs(struct terminal *term, unicode_val_T u, const unsigned char *recoded; set_kbd_term_event(&ev, KBD_UNDEF, modifier); - recoded = u2cp_no_nbsp(u, get_opt_codepage_tree(term->spec, "charset")); + recoded = u2cp_no_nbsp(u, get_terminal_codepage(term)); if (!recoded) recoded = "*"; while (*recoded) { ev.info.keyboard.key = *recoded; @@ -184,8 +184,7 @@ check_terminal_name(struct terminal *term, struct terminal_info *info) /* Probably not best place for set this. But now we finally have * term->spec and term->utf8 should be set before decode session info. * --Scrool */ - term->utf8_cp = is_cp_utf8(get_opt_codepage_tree(term->spec, - "charset")); + term->utf8_cp = is_cp_utf8(get_terminal_codepage(term)); /* Force UTF-8 I/O if the UTF-8 charset is selected. Various * places assume that the terminal's charset is unibyte if * UTF-8 I/O is disabled. (bug 827) */ diff --git a/src/terminal/terminal.c b/src/terminal/terminal.c index d01af67a..1cfa08fd 100644 --- a/src/terminal/terminal.c +++ b/src/terminal/terminal.c @@ -103,6 +103,24 @@ init_term(int fdin, int fdout) return term; } +/** Get the codepage of a terminal. The UTF-8 I/O option does not + * affect this. + * + * @todo Perhaps cache the value in struct terminal? + * + * @bug Bug 1064: If the charset has been set as "System", this should + * apply the locale environment variables of the slave ELinks process, + * not those of the master ELinks process that parsed the configuration + * file. That is why the parameter points to struct terminal and not + * merely to its option tree (term->spec). + * + * @see get_translation_table(), get_cp_mime_name() */ +int +get_terminal_codepage(const struct terminal *term) +{ + return get_opt_codepage_tree(term->spec, "charset"); +} + void redraw_all_terminals(void) { diff --git a/src/terminal/terminal.h b/src/terminal/terminal.h index ad01a30e..1b50ca3b 100644 --- a/src/terminal/terminal.h +++ b/src/terminal/terminal.h @@ -172,6 +172,7 @@ void destroy_terminal(struct terminal *); void redraw_terminal(struct terminal *term); void redraw_terminal_cls(struct terminal *term); void cls_redraw_all_terminals(void); +int get_terminal_codepage(const struct terminal *); void redraw_all_terminals(void); void destroy_all_terminals(void); diff --git a/src/viewer/text/form.c b/src/viewer/text/form.c index b21fe7ac..775e1d04 100644 --- a/src/viewer/text/form.c +++ b/src/viewer/text/form.c @@ -165,7 +165,7 @@ init_form_state(struct document_view *doc_view, doc_cp = doc_view->document->cp; term = doc_view->session->tab->term; - viewer_cp = get_opt_codepage_tree(term->spec, "charset"); + viewer_cp = get_terminal_codepage(term); mem_free_set(&fs->value, NULL); @@ -1248,7 +1248,7 @@ get_form_uri(struct session *ses, struct document_view *doc_view, get_successful_controls(doc_view, fc, &submit); - cp_from = get_opt_codepage_tree(ses->tab->term->spec, "charset"); + cp_from = get_terminal_codepage(ses->tab->term); cp_to = doc_view->document->cp; switch (form->method) { case FORM_METHOD_GET: @@ -1846,8 +1846,7 @@ field_op(struct session *ses, struct document_view *doc_view, #ifdef CONFIG_UTF8 /* fs->value is in the charset of the terminal. */ ctext = u2cp_no_nbsp(get_kbd_key(ev), - get_opt_codepage_tree(ses->tab->term->spec, - "charset")); + get_terminal_codepage(ses->tab->term)); length = strlen(ctext); if (strlen(fs->value) + length > fc->maxlength diff --git a/src/viewer/text/link.c b/src/viewer/text/link.c index 6bb04d87..543a5a86 100644 --- a/src/viewer/text/link.c +++ b/src/viewer/text/link.c @@ -1216,8 +1216,7 @@ try_document_key(struct session *ses, struct document_view *doc_view, #ifdef CONFIG_UTF8 key = get_kbd_key(ev); #else /* !CONFIG_UTF8 */ - key = cp2u(get_opt_codepage_tree(ses->tab->term->spec, - "charset"), + key = cp2u(get_terminal_codepage(ses->tab->term), get_kbd_key(ev)); #endif /* !CONFIG_UTF8 */ /* If @key now is 0 (which is used in link.accesskey if there From 8f4d7f99038612f265ccd4ef83469381512f95fd Mon Sep 17 00:00:00 2001 From: Kalle Olavi Niemitalo Date: Sat, 18 Oct 2008 13:51:04 +0300 Subject: [PATCH 5/9] Define cp_to_unicode() even without CONFIG_UTF8. And make its last parameter point to const. add_cp_html_to_string() no longer needs to pretend UTF-8 is ISO-8859-1. --- src/intl/charsets.c | 8 +++----- src/intl/charsets.h | 4 ++-- src/util/conv.c | 20 +++++--------------- 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/src/intl/charsets.c b/src/intl/charsets.c index 7859ee04..d9ab65a7 100644 --- a/src/intl/charsets.c +++ b/src/intl/charsets.c @@ -254,7 +254,6 @@ encode_utf8(unicode_val_T u) return utf_buffer; } -#ifdef CONFIG_UTF8 /* Number of bytes utf8 character indexed by first byte. Illegal bytes are * equal ones and handled different. */ static const char utf8char_len_tab[256] = { @@ -268,6 +267,7 @@ static const char utf8char_len_tab[256] = { 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 5,5,5,5,6,6,1,1, }; +#ifdef CONFIG_UTF8 inline int utf8charlen(const unsigned char *p) { return p ? utf8char_len_tab[*p] : 0; @@ -630,6 +630,7 @@ unicode_fold_label_case(unicode_val_T c) return c; #endif /* !(__STDC_ISO_10646__ && HAVE_WCTYPE_H) */ } +#endif /* CONFIG_UTF8 */ inline unicode_val_T utf8_to_unicode(unsigned char **string, const unsigned char *end) @@ -714,7 +715,6 @@ invalid_utf8: *string = str + length; return u; } -#endif /* CONFIG_UTF8 */ /* The common part of cp2u and cp2utf_8. */ static unicode_val_T @@ -753,9 +753,8 @@ cp2utf8(int from, int c) return encode_utf8(cp2u_shared(&codepages[from], c)); } -#ifdef CONFIG_UTF8 unicode_val_T -cp_to_unicode(int codepage, unsigned char **string, unsigned char *end) +cp_to_unicode(int codepage, unsigned char **string, const unsigned char *end) { unicode_val_T ret; @@ -769,7 +768,6 @@ cp_to_unicode(int codepage, unsigned char **string, unsigned char *end) ++*string; return ret; } -#endif /* CONFIG_UTF8 */ static void diff --git a/src/intl/charsets.h b/src/intl/charsets.h index ac7e067e..62223136 100644 --- a/src/intl/charsets.h +++ b/src/intl/charsets.h @@ -144,9 +144,9 @@ unsigned char *utf8_step_backward(unsigned char *, unsigned char *, inline int unicode_to_cell(unicode_val_T); unicode_val_T unicode_fold_label_case(unicode_val_T); inline int strlen_utf8(unsigned char **); -inline unicode_val_T utf8_to_unicode(unsigned char **, const unsigned char *); -unicode_val_T cp_to_unicode(int, unsigned char **, unsigned char *); #endif /* CONFIG_UTF8 */ +inline unicode_val_T utf8_to_unicode(unsigned char **, const unsigned char *); +unicode_val_T cp_to_unicode(int, unsigned char **, const unsigned char *); unicode_val_T cp2u(int, unsigned char); const unsigned char *cp2utf8(int, int); diff --git a/src/util/conv.c b/src/util/conv.c index 7ca0dc63..1bf78774 100644 --- a/src/util/conv.c +++ b/src/util/conv.c @@ -313,21 +313,11 @@ add_cp_html_to_string(struct string *string, int src_codepage, const unsigned char *const end = src + len; unicode_val_T unicode; - while (src != end) { - if (is_cp_utf8(src_codepage)) { -#ifdef CONFIG_UTF8 - unicode = utf8_to_unicode((unsigned char **) &src, - end); - if (unicode == UCS_NO_CHAR) - break; -#else /* !CONFIG_UTF8 */ - /* Cannot parse UTF-8 without CONFIG_UTF8. - * Pretend the input is ISO-8859-1 instead. */ - unicode = *src++; -#endif /* !CONFIG_UTF8 */ - } else { - unicode = cp2u(src_codepage, *src++); - } + for (;;) { + unicode = cp_to_unicode(src_codepage, + (unsigned char **) &src, end); + if (unicode == UCS_NO_CHAR) + break; if (unicode < 0x20 || unicode >= 0x7F || unicode == '<' || unicode == '>' || unicode == '&' From b6dfdf86a6f06db8e8fb2da909777ab04c5a870e Mon Sep 17 00:00:00 2001 From: Kalle Olavi Niemitalo Date: Mon, 29 Dec 2008 03:09:53 +0200 Subject: [PATCH 6/9] Bug 885: Proper charset support in xterm window title When ELinks runs in an X11 terminal emulator (e.g. xterm), or in GNU Screen, it tries to update the title of the window to match the title of the current document. To do this, ELinks sends an "OSC 1 ; Pt BEL" sequence to the terminal. Unfortunately, xterm expects the Pt string to be in the ISO-8859-1 charset, making it impossible to display e.g. Cyrillic characters. In xterm patch #210 (2006-03-12) however, there is a menu item and a resource that can make xterm take the Pt string in UTF-8 instead, allowing characters from all around the world. The downside is that ELinks apparently cannot ask xterm whether the setting is on or off; so add a terminal._template_.latin1_title option to ELinks and let the user edit that instead. Complete list of changes: - Add the terminal._template_.latin1_title option. But do not add that to the terminal options window because it's already rather crowded there. - In set_window_title(), take a new codepage argument. Use it to decode the title into Unicode characters, and remove only actual control characters. For example, CP437 has graphical characters in the 0x80...0x9F range, so don't remove those, even though ISO-8859-1 has control characters in the same range. Likewise, don't misinterpret single bytes of UTF-8 characters as control characters. - In set_window_title(), do not truncate the title to the width of the window. The font is likely to be different and proportional anyway. But do truncate before 1024 bytes, an xterm limit. - In struct itrm, add a title_codepage member to remember which charset the master said it was going to use in the terminal window title. Initialize title_codepage in handle_trm(), update it in dispatch_special() if the master sends the new request TERM_FN_TITLE_CODEPAGE, and use it in most set_window_title() calls; but not in the one that sets $TERM as the title, because that string was not received from the master and should consist of ASCII characters only. - In set_terminal_title(), convert the caller-provided title to ISO-8859-1 or UTF-8 if appropriate, and report the codepage to the slave with the new TERM_FN_TITLE_CODEPAGE request. The conversion can run out of memory, so return a success/error flag, rather than void. In display_window_title(), check this result and don't update caches on error. - Add a NEWS entry for all of this. --- NEWS | 4 +++ src/config/options.inc | 13 ++++++++ src/dialogs/status.c | 10 +++--- src/osdep/os2/os2.c | 2 +- src/osdep/osdep.c | 72 ++++++++++++++++++++--------------------- src/osdep/osdep.h | 2 +- src/terminal/itrm.h | 1 + src/terminal/kbd.c | 29 +++++++++++++++-- src/terminal/terminal.c | 38 ++++++++++++++++++++-- src/terminal/terminal.h | 10 +++--- 10 files changed, 127 insertions(+), 54 deletions(-) diff --git a/NEWS b/NEWS index 8b4656f4..24b78b63 100644 --- a/NEWS +++ b/NEWS @@ -15,6 +15,10 @@ includes the changes listed under ``ELinks 0.11.5.GIT'' below. Previously, they could turn into spaces or disappear entirely. * Perl scripts can use modules that dynamically load C libraries, like XML::LibXML::SAX does. +* bug 885: Convert xterm titles to ISO-8859-1 by default, but add an + option to disable this. When removing control characters from a + title, note the charset. Don't truncate titles to the width of the + terminal. * enhancement: Updated ISO 8859-7, ISO 8859-16, KOI8-R, and MacRoman. ELinks 0.12pre2: diff --git a/src/config/options.inc b/src/config/options.inc index ad96f26e..0c508ae4 100644 --- a/src/config/options.inc +++ b/src/config/options.inc @@ -847,6 +847,19 @@ static struct option_info config_options_info[] = { "3 is KOI-8\n" "4 is FreeBSD")), + INIT_OPT_BOOL("terminal._template_", N_("Always encode xterm title in ISO-8859-1"), + "latin1_title", 0, 1, + N_("When updating the window title of xterm or a similar " + "terminal emulator, encode the title in ISO-8859-1 (Latin-1), " + "rather than in the charset used for other text in the window. " + "Cyrillic and other characters get replaced with Latin ones. " + "Xterm requires this unless you explicitly enable UTF-8 " + "titles in it.\n" + "\n" + "If this option does not take effect immediately, try switching " + "to a different page so that ELinks notices it needs to update " + "the title.")), + INIT_OPT_BOOL("terminal._template_", N_("Switch fonts for line drawing"), "m11_hack", 0, 0, N_("Switch fonts when drawing lines, enabling both local characters\n" diff --git a/src/dialogs/status.c b/src/dialogs/status.c index 908ad0b2..b41b0955 100644 --- a/src/dialogs/status.c +++ b/src/dialogs/status.c @@ -489,12 +489,12 @@ display_window_title(struct session *ses, struct terminal *term) if (!title) return; titlelen = strlen(title); - if (last_ses != ses - || !status->last_title - || strlen(status->last_title) != titlelen - || memcmp(status->last_title, title, titlelen)) { + if ((last_ses != ses + || !status->last_title + || strlen(status->last_title) != titlelen + || memcmp(status->last_title, title, titlelen)) + && set_terminal_title(term, title) >= 0) { mem_free_set(&status->last_title, title); - set_terminal_title(term, title); last_ses = ses; } else { mem_free(title); diff --git a/src/osdep/os2/os2.c b/src/osdep/os2/os2.c index 20d6d9f4..16875c14 100644 --- a/src/osdep/os2/os2.c +++ b/src/osdep/os2/os2.c @@ -303,7 +303,7 @@ get_window_title(void) } void -set_window_title(unsigned char *title) +set_window_title(unsigned char *title, int codepage) { #ifndef DEBUG_OS2 static PTIB tib; diff --git a/src/osdep/osdep.c b/src/osdep/osdep.c index 74eab68a..e26463e3 100644 --- a/src/osdep/osdep.c +++ b/src/osdep/osdep.c @@ -407,73 +407,71 @@ set_clipboard_text(unsigned char *data) /* Set xterm-like term window's title. */ void -set_window_title(unsigned char *title) +set_window_title(unsigned char *title, int codepage) { - unsigned char *s; - int xsize, ysize; - int j = 0; + struct string filtered; #ifndef HAVE_SYS_CYGWIN_H /* Check if we're in a xterm-like terminal. */ if (!is_xterm() && !is_gnuscreen()) return; #endif - /* Retrieve terminal dimensions. */ - get_terminal_size(0, &xsize, &ysize); + if (!init_string(&filtered)) return; - /* Check if terminal width is reasonnable. */ - if (xsize < 1 || xsize > 1024) return; - - /* Allocate space for title + 3 ending points + null char. */ - s = mem_alloc(xsize + 3 + 1); - if (!s) return; - - /* Copy title to s if different from NULL */ + /* Copy title to filtered if different from NULL */ if (title) { - int i; + unsigned char *scan = title; + unsigned char *end = title + strlen(title); - /* We limit title length to terminal width and ignore control - * chars if any. Note that in most cases window decoration - * reduces printable width, so it's just a precaution. */ + /* Remove control characters, so that they cannot + * interfere with the command we send to the terminal. + * However, do not attempt to limit the title length + * to terminal width, because the title is usually + * drawn in a different font anyway. */ /* Note that this is the right place where to do it, since * potential alternative set_window_title() routines might * want to take different precautions. */ - for (i = 0; title[i] && i < xsize; i++) { - /* 0x80 .. 0x9f are ISO-8859-* control characters. - * In some other encodings they could be used for - * legitimate characters, though (ie. in Kamenicky). - * We should therefore maybe check for these only - * if the terminal is running in an ISO- encoding. */ - if (iscntrl(title[i]) || (title[i] & 0x7f) < 0x20 - || title[i] == 0x7f) + for (;;) { + unsigned char *charbegin = scan; + unicode_val_T unicode + = cp_to_unicode(codepage, &scan, end); + int charlen = scan - charbegin; + + if (unicode == UCS_NO_CHAR) + break; + + /* This need not recognize all Unicode control + * characters. Only those that can make the + * terminal misparse the command. */ + if (unicode < 0x20 + || (unicode >= 0x7F && unicode < 0xA0)) continue; - s[j++] = title[i]; - } + /* xterm entirely rejects 1024-byte or longer + * titles. */ + if (filtered.length + charlen >= 1024 - 3) { + add_to_string(&filtered, "..."); + break; + } - /* If title is truncated, add "..." */ - if (i == xsize) { - s[j++] = '.'; - s[j++] = '.'; - s[j++] = '.'; + add_bytes_to_string(&filtered, charbegin, charlen); } } - s[j] = '\0'; /* Send terminal escape sequence + title string */ - printf("\033]0;%s\a", s); + printf("\033]0;%s\a", filtered.source); #if 0 /* Miciah don't like this so it is disabled because it changes the * default window name. --jonas */ /* Set the GNU screen window name */ if (is_gnuscreen()) - printf("\033k%s\033\134", s); + printf("\033k%s\033\134", filtered.source); #endif fflush(stdout); - mem_free(s); + done_string(&filtered); } #ifdef HAVE_X11 diff --git a/src/osdep/osdep.h b/src/osdep/osdep.h index 721916fd..15cefc3b 100644 --- a/src/osdep/osdep.h +++ b/src/osdep/osdep.h @@ -38,7 +38,7 @@ void resume_mouse(void *); int start_thread(void (*)(void *, int), void *, int); unsigned char *get_clipboard_text(void); void set_clipboard_text(unsigned char *); -void set_window_title(unsigned char *); +void set_window_title(unsigned char *, int codepage); unsigned char *get_window_title(void); void block_stdin(void); void unblock_stdin(void); diff --git a/src/terminal/itrm.h b/src/terminal/itrm.h index 30e6375e..63163b44 100644 --- a/src/terminal/itrm.h +++ b/src/terminal/itrm.h @@ -101,6 +101,7 @@ struct itrm { unsigned char *orig_title; /**< For restoring window title */ int verase; /**< Byte to map to KBD_BS, or -1 */ + int title_codepage; /**< Codepage of terminal title */ unsigned int blocked:1; /**< Whether it was blocked */ unsigned int altscreen:1; /**< Whether to use alternate screen */ unsigned int touched_title:1; /**< Whether the term title was changed */ diff --git a/src/terminal/kbd.c b/src/terminal/kbd.c index d9ffde75..af37ebd1 100644 --- a/src/terminal/kbd.c +++ b/src/terminal/kbd.c @@ -324,6 +324,11 @@ handle_trm(int std_in, int std_out, int sock_in, int sock_out, int ctl_in, itrm->timer = TIMER_ID_UNDEF; itrm->remote = !!remote; + /* If the master does not tell which charset it's using in + * this terminal, assume it's some ISO 8859. Because that's + * what older versions of ELinks did. */ + itrm->title_codepage = get_cp_index("ISO-8859-1"); + /* FIXME: Combination altscreen + xwin does not work as it should, * mouse clicks are reportedly partially ignored. */ if (info.system_env & (ENV_SCREEN | ENV_XWIN)) @@ -415,7 +420,7 @@ free_itrm(struct itrm *itrm) if (!itrm->remote) { if (itrm->orig_title && *itrm->orig_title) { - set_window_title(itrm->orig_title); + set_window_title(itrm->orig_title, itrm->title_codepage); } else if (itrm->touched_title) { /* Set the window title to the value of $TERM if X11 @@ -425,7 +430,8 @@ free_itrm(struct itrm *itrm) get_terminal_name(title); if (*title) - set_window_title(title); + set_window_title(title, + get_cp_index("US-ASCII")); } @@ -498,7 +504,12 @@ dispatch_special(unsigned char *text) ditrm->orig_title = get_window_title(); ditrm->touched_title = 1; } - set_window_title(text + 1); + /* TODO: Is it really possible to get here with + * ditrm == NULL, and which charset would then + * be most appropriate? */ + set_window_title(text + 1, + ditrm ? ditrm->title_codepage + : get_cp_index("US-ASCII")); break; case TERM_FN_RESIZE: if (ditrm && ditrm->remote) @@ -506,6 +517,18 @@ dispatch_special(unsigned char *text) resize_terminal_from_str(text + 1); break; + case TERM_FN_TITLE_CODEPAGE: + if (ditrm) { + int cp = get_cp_index(text + 1); + + /* If the master sends the name of an + * unrecognized charset, assume only + * that it's ASCII compatible. */ + if (cp == -1) + cp = get_cp_index("US-ASCII"); + ditrm->title_codepage = cp; + } + break; } } diff --git a/src/terminal/terminal.c b/src/terminal/terminal.c index 1cfa08fd..034cb497 100644 --- a/src/terminal/terminal.c +++ b/src/terminal/terminal.c @@ -372,12 +372,44 @@ do_terminal_function(struct terminal *term, unsigned char code, fmem_free(x_data); } -void +/** @return negative on error; zero or positive on success. */ +int set_terminal_title(struct terminal *term, unsigned char *title) { - if (term->title && !strcmp(title, term->title)) return; + int from_cp; + int to_cp; + unsigned char *converted = NULL; + + if (term->title && !strcmp(title, term->title)) return 0; + + /* In which codepage was the title parameter given? */ + from_cp = get_terminal_codepage(term); + + /* In which codepage does the terminal want the title? */ + if (get_opt_bool_tree(term->spec, "latin1_title")) + to_cp = get_cp_index("ISO-8859-1"); + else if (get_opt_bool_tree(term->spec, "utf_8_io")) + to_cp = get_cp_index("UTF-8"); + else + to_cp = from_cp; + + if (from_cp != to_cp) { + struct conv_table *convert_table; + + convert_table = get_translation_table(from_cp, to_cp); + if (!convert_table) return -1; + converted = convert_string(convert_table, title, strlen(title), + to_cp, CSM_NONE, NULL, NULL, NULL); + if (!converted) return -1; + } + mem_free_set(&term->title, stracpy(title)); - do_terminal_function(term, TERM_FN_TITLE, title); + do_terminal_function(term, TERM_FN_TITLE_CODEPAGE, + get_cp_mime_name(to_cp)); + do_terminal_function(term, TERM_FN_TITLE, + converted ? converted : title); + mem_free_if(converted); + return 0; } static int terminal_pipe[2]; diff --git a/src/terminal/terminal.h b/src/terminal/terminal.h index 1b50ca3b..c2c1d79f 100644 --- a/src/terminal/terminal.h +++ b/src/terminal/terminal.h @@ -185,12 +185,14 @@ void close_handle(void *); void assert_terminal_ptr_not_dangling(const struct terminal *); #endif -/** Operations that can be requested with do_terminal_function(). +/** Operations that can be requested with do_terminal_function() in + * the master and then executed with dispatch_special() in a slave. * The interlink protocol passes these values as one byte in a * null-terminated string, so zero cannot be used. */ enum { - TERM_FN_TITLE = 1, - TERM_FN_RESIZE = 2 + TERM_FN_TITLE = 1, + TERM_FN_RESIZE = 2, + TERM_FN_TITLE_CODEPAGE = 3 }; /** How to execute a program in a terminal. These values are used in @@ -211,7 +213,7 @@ enum term_exec { void exec_on_terminal(struct terminal *, unsigned char *, unsigned char *, enum term_exec); void exec_shell(struct terminal *term); -void set_terminal_title(struct terminal *, unsigned char *); +int set_terminal_title(struct terminal *, unsigned char *); void do_terminal_function(struct terminal *, unsigned char, unsigned char *); int check_terminal_pipes(void); From 5be3f71ddd55b1cd15f6dc48a10f9f8bb8151b2f Mon Sep 17 00:00:00 2001 From: Kalle Olavi Niemitalo Date: Thu, 1 Jan 2009 18:35:11 +0000 Subject: [PATCH 7/9] Add test/image.png and use it in test/imgmap.html. This makes the image-map test work sensibly in graphical browsers too. --- test/image.png | Bin 0 -> 2127 bytes test/imgmap.html | 10 +++++----- 2 files changed, 5 insertions(+), 5 deletions(-) create mode 100644 test/image.png diff --git a/test/image.png b/test/image.png new file mode 100644 index 0000000000000000000000000000000000000000..0d09732b56dd32f377f2c40abec9303d53edeebe GIT binary patch literal 2127 zcmV-V2(b5wP)Px#2XskIMF-gd0TK=YiXX$20000YbVXQnLvL+uWo~o;Lvm$dbY)~9cWHEJAV*0} zP#|w^AYmX)VPl7=cS8UG2gpf8K~z}7&6s&?6xSWcKkv=#?BU(DgAMrPFo!XuV3orH znyP;wlBTtaXe0F!NR^tXYE(t7s;ZZ&M6LKIt*Q`JRh2^}Bq2R$0!38RiqcjFETzOI zP=g5s3~MhozV@8^^$+hZ#<;~e1o=tx$BgF9`@G-zodQ&KMTP}GngJ-Lc53p#t*;w%AyqN*Yxt=6* z9#EfaKs(!7wEAje(a5;FYX2Wj9hGwpPSg1D$e43BZ~8CpY-{<7KF8wisu0xVOa%}E zzNOCrt3~5c`B*0BzbFL)RmyMyWd1K;UF$Xjs4d*uiO%-^dj+Nx>}+dUV+1&y%J@rz zhQRYC3VC6qfN$#4Kt+Kqz>>mMQv|GQ-F6w0_XN&AEfIol+d%^`3`saB7#eYlul@eu z;jV-}#ab(Y?ZB6S#gnX89QH?rGp~3!?E>-w1)31Buqr_F@(6wX7DK~sah*M-GE;zC z4}1^UjF7c~NDLVWBK1IN+n!??AH{;%b1uV2Y?LPu0zyH_+Eo!6YC|N4Z2A&bvA?`< z@Nm}vfIbQChalRtV2D6j1x9QhS|ErF7zI8b-?o6WcMay-igcGty2C+v5D7_^Gz3{v zA0iUg0O(6tIIaR1Ie571+mj1*C9oSrn;tGBG_Qt0I939?**_&(FkDh3cLO*0hKZG& zP%!fGY!1ml%=T-0+E$#a<*i3Rs< ziI)K(U{PI=4QrzaA&Q$+dDHhl?6WW}Un|s?zNvs;M95mBtQ;fyzzoxnf?T?TB~R#h z83+UftCvSuvGg_%$aL0ablfdI${sx2_2g8*^-|L@VwJbKb-Ho>-5odG=Q^09*U8_w zW&_6(o$YOf;=2&I3Ir-Ebk;OQSWs!)Cf5Mff8EA$Rq?^G>C{ZE&@}W=+1=~9oAV4{ zNnPu<+MR7JUuGiJ0|HpJEL?({CU(WrhS1m~SFGIG)}q}mw}hCH^==oQ5~-Kw1((ND zJAqe<4?-cq#Xgf<-a{!hIrd5;9@V1r=jrLf?P}oJ$&mL{&GhFY-N2dRecn_gha8Um zbClEPvRu59CzJK2s_Vc*o619xu=EQ3_p`u)TLG2AckEfTEFa%3&9ic@ei8UIa0vJt zkOOc%xbUG#cTbj+@1?nN&Ac_vq%gLukNa{W5*vZNrT?FJedFUo$e11~E7iFf19~oY zW)wG7@#r7485(g28IrPyCMwEx zk;-_aGoA!0nwzSRXz}_+1Jowpb7E3A(8DwPom8%aIhw@DjM=~`U~f8cX<%1uYZUm6 zCbjQNAwOlhW_@+6@`+SW?vQcWc-C6%`~%nsRQWz4oAbGrw8)!2QVJyn6uL?k;Arvs z#zCM4Uu{sHgOD0h>i^S{O8IzJ9y>jZmAV1GKL|XZ=xjd>ur9dhXTWY0(b{;GwlF%6 zD`QDLYJ|V;`CjF#r%(R2xv6?Ta1F4eWU=oP7)yB!4BF!Qup z>oj?G4&QSTQbQn0pc7)j#L1!ql1^Mo?}}}G9C!psFRyHv|47qj zy(V78jqEsy@gd=(Hf;zmt^R84;Rk=({>Cqxo2vh;6mRJo^P^?jeAD#9N+Al_>;kTs zR&GgIJ0Tc?H|w18@f-`!Hu21?4fFzk1@gg`0Y+VO@)lz1AMJAQXbn_->fgItVfDl846U>w2o@I0f$IfTnK>=|;`Q`qg^#f+a!5k-m zwcr$MzPB#8DDNw^eca4OE!Wm2;!(Y?VDr9o$H;8I{a>%6VPFw1Z>|6U002ovPDHLk FV1m&36AS -ImageMap +ImageMap -1 -2 -3 -4 +1 +2 +3 +4 From dc41f0bd4ce759fbfb3fae7f56e4051a5bad59a6 Mon Sep 17 00:00:00 2001 From: Kalle Olavi Niemitalo Date: Thu, 1 Jan 2009 18:36:34 +0000 Subject: [PATCH 8/9] test: Don't refer to deleted files from imgmap.html. align.html and poocs.net.html have been deleted. Point the links to href_tests.html and nbsp.html instead. --- test/imgmap.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/imgmap.html b/test/imgmap.html index 7f6873b1..99015d25 100644 --- a/test/imgmap.html +++ b/test/imgmap.html @@ -2,10 +2,10 @@ ImageMap -1 +1 2 3 -4 +4 From 29c34df62e086c19b24ad34ec927d99f5998212d Mon Sep 17 00:00:00 2001 From: Kalle Olavi Niemitalo Date: Thu, 1 Jan 2009 18:38:07 +0000 Subject: [PATCH 9/9] Fix assertion failure if IMG/@usemap refers to a different file. Change test/imgmap2.html so it can be used for testing this too. Debian Iceweasel 3.0.4 does not appear to support such external client-side image maps. Well, that's one place where ELinks is superior, I guess. There might be a security problem though if ELinks were to let scripts of the referring page examine the links in the image map. --- NEWS | 2 ++ src/session/session.c | 23 ++++++++++++++++------- test/imgmap2.html | 11 ++++++----- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/NEWS b/NEWS index 24b78b63..d52363b2 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,8 @@ ELinks 0.12pre2.GIT now: To be released as 0.12pre3, 0.12rc1, or even 0.12.0. This branch also includes the changes listed under ``ELinks 0.11.5.GIT'' below. +* critical: Fix assertion failure if IMG/@usemap refers to a different + file. * Preserve newlines in hidden input fields, and submit them as CRLF. Previously, they could turn into spaces or disappear entirely. * Perl scripts can use modules that dynamically load C libraries, like diff --git a/src/session/session.c b/src/session/session.c index fd7d042e..99a6ddc7 100644 --- a/src/session/session.c +++ b/src/session/session.c @@ -518,17 +518,23 @@ maybe_pre_format_html(struct cache_entry *cached, struct session *ses) * were 0, it could then be freed, and the * cached->preformatted assignment at the end of this function * would crash. Normally, the document has a reference to the - * cache entry, and that suffices. If the following assertion - * ever fails, object_lock(cached) and object_unlock(cached) - * must be added to this function. */ - assert(cached->object.refcount > 0); - if_assert_failed return; + * cache entry, and that suffices. However, if the cache + * entry was loaded to satisfy e.g. USEMAP="imgmap.html#map", + * then cached->object.refcount == 0 here, and must be + * incremented. + * + * cached->object.refcount == 0 is safe while the cache entry + * is being loaded, because garbage_collection() calls + * is_entry_used(), which checks whether any connection is + * using the cache entry. But loading has ended before this + * point. */ + object_lock(cached); fragment = get_cache_fragment(cached); - if (!fragment) return; + if (!fragment) goto unlock_and_return; /* We cannot do anything if the data are fragmented. */ - if (!list_is_singleton(cached->frag)) return; + if (!list_is_singleton(cached->frag)) goto unlock_and_return; set_event_id(pre_format_html_event, "pre-format-html"); trigger_event(pre_format_html_event, ses, cached); @@ -536,6 +542,9 @@ maybe_pre_format_html(struct cache_entry *cached, struct session *ses) /* XXX: Keep this after the trigger_event, because hooks might call * normalize_cache_entry()! */ cached->preformatted = 1; + +unlock_and_return: + object_unlock(cached); } #endif diff --git a/test/imgmap2.html b/test/imgmap2.html index e96e1846..7a503feb 100644 --- a/test/imgmap2.html +++ b/test/imgmap2.html @@ -1,5 +1,6 @@ -Double-free crash in USEMAP -

- -see this? - \ No newline at end of file +Crashes in client-side image maps +

ImageMap in another file

+

ImageMap at the very end of this file

+ +see this? +