mirror of
https://github.com/rkd77/elinks.git
synced 2024-12-04 14:46:47 -05:00
Bug 824: Disable combining characters unless --enable-combining.
Label this as an experimental feature because it has so many bugs and it is not clear how they can be fixed.
This commit is contained in:
parent
14d1a0f3e2
commit
f19c948ca7
@ -305,7 +305,7 @@ AC_CHECK_FUNCS(setenv putenv, HAVE_SETENV_OR_PUTENV=yes)
|
||||
AC_CHECK_FUNCS(getuid, HAVE_GETUID=yes)
|
||||
AC_CHECK_FUNCS(geteuid, HAVE_GETEUID=yes)
|
||||
|
||||
AC_CHECK_FUNCS(wcwidth)
|
||||
AC_CHECK_FUNCS(wcwidth, HAVE_WCWIDTH=yes)
|
||||
|
||||
dnl These aren't probably needed now, as they are commented in links.h.
|
||||
dnl I've no idea about their historical background, but I keep them here
|
||||
@ -1338,6 +1338,9 @@ EL_ARG_ENABLE(CONFIG_SMALL, small, [Small binary],
|
||||
EL_ARG_ENABLE(CONFIG_UTF8, utf-8, [UTF-8],
|
||||
[ --disable-utf-8 disable UTF-8 support])
|
||||
|
||||
EL_ARG_DEPEND(CONFIG_COMBINE, combining, [CONFIG_UTF8:yes HAVE_WCWIDTH:yes], [Combining characters],
|
||||
[ --enable-combining support Unicode combining characters (experimental)])
|
||||
|
||||
|
||||
AC_ARG_ENABLE(weehoofooboomookerchoo,
|
||||
[
|
||||
|
@ -621,7 +621,7 @@ CONFIG_SMALL=no
|
||||
# support for double-width characters (like Japanese, etc.).
|
||||
#
|
||||
# Some features of Unicode are not handled at all. Combining characters is
|
||||
# most visible absence.
|
||||
# most visible absence; but see CONFIG_COMBINE below.
|
||||
# Some features are partially supported. Like line breaking between
|
||||
# double-width characters. There is no other detection for determining when to
|
||||
# break or not.
|
||||
@ -633,6 +633,42 @@ CONFIG_SMALL=no
|
||||
CONFIG_UTF8=yes
|
||||
|
||||
|
||||
### Unicode combining characters support
|
||||
#
|
||||
# Extends CONFIG_UTF8 with spotty support for combining characters
|
||||
# such as U+0303 COMBINING TILDE.
|
||||
#
|
||||
# This feature is experimental and has been filed as enhancement 824.
|
||||
# Known bugs and weaknesses:
|
||||
#
|
||||
# - It assumes wcwidth(wc)==0 means wc is a combining character.
|
||||
# However, wcwidth also returns 0 for various control characters
|
||||
# (e.g. U+200E LEFT-TO-RIGHT MARK), and apparently returns -1 if
|
||||
# LC_CTYPE does not support the wide character. Besides, wchar_t
|
||||
# might not be Unicode at all. ELinks should instead use Unicode
|
||||
# character properties, perhaps via ICU.
|
||||
#
|
||||
# - It assumes all combining characters are nonspacing.
|
||||
#
|
||||
# - It works only if the terminal is using the UTF-8 charset.
|
||||
#
|
||||
# - It allocates an internal code for each different combining
|
||||
# character sequence. A malicious web page could easily use up all
|
||||
# the available codes, and the ELinks process would thenceforth be
|
||||
# unable to display any new sequences.
|
||||
#
|
||||
# - It does not understand canonical equivalences.
|
||||
#
|
||||
# - Combining characters work only in HTML text. They do not work in
|
||||
# HTML forms, HTML links, HTML document titles, plain text, menus,
|
||||
# dialog boxes, or keymaps.
|
||||
#
|
||||
# - Combining characters at the end of the document do not take effect.
|
||||
#
|
||||
# Default: disabled
|
||||
|
||||
CONFIG_COMBINE=no
|
||||
|
||||
|
||||
### Back-trace Printing
|
||||
#
|
||||
|
@ -864,10 +864,12 @@ static struct option_info config_options_info[] = {
|
||||
"only the subset of UTF-8 according to terminal codepage is used.\n"
|
||||
"ELinks ignores this option if the terminal codepage is UTF-8.")),
|
||||
|
||||
#ifdef CONFIG_COMBINE
|
||||
INIT_OPT_BOOL("terminal._template_", N_("Combining characters"),
|
||||
"combine", 0, 0,
|
||||
N_("Enable combining characters. It works only with "
|
||||
"the xterm in UTF-8 mode.")),
|
||||
#endif
|
||||
|
||||
INIT_OPT_BOOL("terminal._template_", N_("Restrict frames in cp850/852"),
|
||||
"restrict_852", 0, 0,
|
||||
|
@ -91,7 +91,9 @@ enum termopt {
|
||||
TERM_OPT_UTF_8_IO,
|
||||
TERM_OPT_TRANSPARENCY,
|
||||
TERM_OPT_UNDERLINE,
|
||||
#ifdef CONFIG_COMBINE
|
||||
TERM_OPT_COMBINE,
|
||||
#endif
|
||||
|
||||
TERM_OPTIONS,
|
||||
};
|
||||
@ -105,7 +107,9 @@ static struct option_resolver resolvers[] = {
|
||||
{ TERM_OPT_TRANSPARENCY, "transparency" },
|
||||
{ TERM_OPT_UTF_8_IO, "utf_8_io" },
|
||||
{ TERM_OPT_UNDERLINE, "underline" },
|
||||
#ifdef CONFIG_COMBINE
|
||||
{ TERM_OPT_COMBINE, "combine" },
|
||||
#endif
|
||||
};
|
||||
|
||||
static widget_handler_status_T
|
||||
@ -151,7 +155,7 @@ push_save_button(struct dialog_data *dlg_data, struct widget_data *button)
|
||||
#define RADIO_TRUE 0
|
||||
#endif
|
||||
|
||||
#define TERMOPT_WIDGETS_COUNT (20 + RADIO_88 + RADIO_256 + RADIO_TRUE)
|
||||
#define TERMOPT_WIDGETS_COUNT (12 + TERM_OPTIONS + RADIO_88 + RADIO_256 + RADIO_TRUE)
|
||||
|
||||
#define TERM_OPTION_VALUE_SIZE (sizeof(union option_value) * TERM_OPTIONS)
|
||||
|
||||
@ -232,7 +236,9 @@ terminal_options(struct terminal *term, void *xxx, struct session *ses)
|
||||
add_dlg_checkbox(dlg, _("Transparency", term), &values[TERM_OPT_TRANSPARENCY].number);
|
||||
add_dlg_checkbox(dlg, _("Underline", term), &values[TERM_OPT_UNDERLINE].number);
|
||||
add_dlg_checkbox(dlg, _("UTF-8 I/O", term), &values[TERM_OPT_UTF_8_IO].number);
|
||||
#ifdef CONFIG_COMBINE
|
||||
add_dlg_checkbox(dlg, _("Combining characters", term), &values[TERM_OPT_COMBINE].number);
|
||||
#endif
|
||||
|
||||
add_dlg_button(dlg, _("~OK", term), B_ENTER, push_ok_button, NULL);
|
||||
if (!anonymous)
|
||||
|
@ -57,7 +57,7 @@ init_document(struct cache_entry *cached, struct document_options *options)
|
||||
init_list(document->onload_snippets);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_UTF8
|
||||
#ifdef CONFIG_COMBINE
|
||||
document->comb_x = -1;
|
||||
document->comb_y = -1;
|
||||
#endif
|
||||
|
@ -201,6 +201,8 @@ struct document {
|
||||
#ifdef CONFIG_UTF8
|
||||
unsigned char buf[7];
|
||||
unsigned char buf_length;
|
||||
#endif
|
||||
#ifdef CONFIG_COMBINE
|
||||
/* base char + 5 combining chars = 6 */
|
||||
unicode_val_T combi[UCS_MAX_LENGTH_COMBINED];
|
||||
/* the number of combining characters. The base char is not counted. */
|
||||
|
@ -4,15 +4,18 @@
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCWIDTH)
|
||||
#define _XOPEN_SOURCE 500
|
||||
/* Our current implementation of combining characters requires
|
||||
* wcwidth(). Therefore the configure script should have disabled
|
||||
* CONFIG_COMBINE if wcwidth() doesn't exist. */
|
||||
#ifdef CONFIG_COMBINE
|
||||
#define _XOPEN_SOURCE 500 /* for wcwidth */
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
|
||||
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCWIDTH)
|
||||
#ifdef HAVE_WCHAR_H
|
||||
#include <wchar.h>
|
||||
#endif
|
||||
|
||||
@ -489,7 +492,8 @@ good_char:
|
||||
if (data == UCS_NO_BREAK_SPACE
|
||||
&& html_context->options->wrap_nbsp)
|
||||
data = UCS_SPACE;
|
||||
#ifdef HAVE_WCWIDTH
|
||||
|
||||
#ifdef CONFIG_COMBINE
|
||||
if (wcwidth((wchar_t)data)) {
|
||||
if (document->combi_length) {
|
||||
if (document->comb_x != -1) {
|
||||
@ -521,7 +525,7 @@ good_char:
|
||||
part->char_width[x] = unicode_to_cell(data);
|
||||
schar->data = (unicode_val_T)data;
|
||||
}
|
||||
#ifdef HAVE_WCWIDTH
|
||||
#ifdef CONFIG_COMBINE
|
||||
document->comb_x = x;
|
||||
document->comb_y = y;
|
||||
#endif
|
||||
|
@ -770,8 +770,10 @@ cp_to_unicode(int codepage, unsigned char **string, unsigned char *end)
|
||||
++*string;
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_UTF8 */
|
||||
|
||||
|
||||
#ifdef CONFIG_COMBINE
|
||||
unicode_val_T last_combined = UCS_BEGIN_COMBINED - 1;
|
||||
unicode_val_T **combined;
|
||||
struct hash *combined_hash;
|
||||
@ -830,7 +832,8 @@ free_combined()
|
||||
mem_free(combined[i]);
|
||||
mem_free_if(combined);
|
||||
}
|
||||
#endif /* CONFIG_UTF8 */
|
||||
#endif /* CONFIG_COMBINE */
|
||||
|
||||
|
||||
static void
|
||||
add_utf8(struct conv_table *ct, unicode_val_T u, const unsigned char *str)
|
||||
@ -1540,4 +1543,3 @@ is_cp_utf8(int cp_index)
|
||||
cp_index &= ~SYSTEM_CHARSET_FLAG;
|
||||
return is_cp_ptr_utf8(&codepages[cp_index]);
|
||||
}
|
||||
|
||||
|
@ -27,12 +27,14 @@ typedef uint32_t unicode_val_T;
|
||||
* for the second cell of a double-cell character. */
|
||||
#define UCS_NO_CHAR ((unicode_val_T) 0xFFFFFFFD)
|
||||
|
||||
#ifdef CONFIG_COMBINE
|
||||
#define UCS_END_COMBINED ((unicode_val_T) 0xFFFFFFFC)
|
||||
|
||||
#define UCS_BEGIN_COMBINED ((unicode_val_T) (UCS_END_COMBINED - (unicode_val_T) 10000))
|
||||
|
||||
/* Base character and up to 5 combining characters. */
|
||||
#define UCS_MAX_LENGTH_COMBINED 6
|
||||
#endif /* CONFIG_COMBINE */
|
||||
|
||||
/* If ELinks should display a double-cell character but there is only
|
||||
* one cell available, it displays this character instead. This must
|
||||
@ -154,15 +156,15 @@ unicode_val_T unicode_fold_label_case(unicode_val_T);
|
||||
inline int strlen_utf8(unsigned char **);
|
||||
inline unicode_val_T utf8_to_unicode(unsigned char **, const unsigned char *);
|
||||
unicode_val_T cp_to_unicode(int, unsigned char **, unsigned char *);
|
||||
#endif /* CONFIG_UTF8 */
|
||||
|
||||
|
||||
#ifdef CONFIG_COMBINE
|
||||
extern unicode_val_T last_combined;
|
||||
extern unicode_val_T **combined;
|
||||
extern struct hash *combined_hash;
|
||||
unicode_val_T get_combined(unicode_val_T *, int);
|
||||
void free_combined();
|
||||
|
||||
#endif /* CONFIG_UTF8 */
|
||||
#endif /* CONFIG_COMBINE */
|
||||
|
||||
unicode_val_T cp2u(int, unsigned char);
|
||||
const unsigned char *cp2utf8(int, int);
|
||||
|
@ -303,7 +303,7 @@ terminate_all_subsystems(void)
|
||||
done_options();
|
||||
done_event();
|
||||
terminate_osdep();
|
||||
#ifdef CONFIG_UTF8
|
||||
#ifdef CONFIG_COMBINE
|
||||
free_combined();
|
||||
#endif
|
||||
}
|
||||
|
@ -142,6 +142,9 @@ get_dyn_full_version(struct terminal *term, int more)
|
||||
#endif
|
||||
#ifdef CONFIG_UTF8
|
||||
comma, "UTF-8",
|
||||
#endif
|
||||
#ifdef CONFIG_COMBINE
|
||||
comma, _("Combining characters", term),
|
||||
#endif
|
||||
comma,
|
||||
(unsigned char *) NULL
|
||||
|
@ -223,10 +223,12 @@ struct screen_driver {
|
||||
* is the same as is_cp_utf8(charsets[0]), except the
|
||||
* latter might crash if UTF-8 I/O is disabled. */
|
||||
unsigned int utf8_cp:1;
|
||||
#endif /* CONFIG_UTF8 */
|
||||
|
||||
#ifdef CONFIG_COMBINE
|
||||
/* Whether the terminal supports combining characters. */
|
||||
unsigned int combine:1;
|
||||
#endif /* CONFIG_UTF8 */
|
||||
#endif /* CONFIG_COMBINE */
|
||||
} opt;
|
||||
|
||||
/* The terminal._template_ name. */
|
||||
@ -243,8 +245,10 @@ static const struct screen_driver_opt dumb_screen_driver_opt = {
|
||||
/* transparent: */ 1,
|
||||
#ifdef CONFIG_UTF8
|
||||
/* utf8_cp: */ 0,
|
||||
/* combine */ 0,
|
||||
#endif /* CONFIG_UTF8 */
|
||||
#ifdef CONFIG_COMBINE
|
||||
/* combine */ 0,
|
||||
#endif /* CONFIG_COMBINE */
|
||||
};
|
||||
|
||||
/** Default options for ::TERM_VT100. */
|
||||
@ -257,8 +261,10 @@ static const struct screen_driver_opt vt100_screen_driver_opt = {
|
||||
/* transparent: */ 1,
|
||||
#ifdef CONFIG_UTF8
|
||||
/* utf8_cp: */ 0,
|
||||
/* combine */ 0,
|
||||
#endif /* CONFIG_UTF8 */
|
||||
#ifdef CONFIG_COMBINE
|
||||
/* combine */ 0,
|
||||
#endif /* CONFIG_COMBINE */
|
||||
};
|
||||
|
||||
/** Default options for ::TERM_LINUX. */
|
||||
@ -271,8 +277,10 @@ static const struct screen_driver_opt linux_screen_driver_opt = {
|
||||
/* transparent: */ 1,
|
||||
#ifdef CONFIG_UTF8
|
||||
/* utf8_cp: */ 0,
|
||||
/* combine */ 0,
|
||||
#endif /* CONFIG_UTF8 */
|
||||
#ifdef CONFIG_COMBINE
|
||||
/* combine */ 0,
|
||||
#endif /* CONFIG_COMBINE */
|
||||
};
|
||||
|
||||
/** Default options for ::TERM_KOI8. */
|
||||
@ -285,8 +293,10 @@ static const struct screen_driver_opt koi8_screen_driver_opt = {
|
||||
/* transparent: */ 1,
|
||||
#ifdef CONFIG_UTF8
|
||||
/* utf8_cp: */ 0,
|
||||
/* combine */ 0,
|
||||
#endif /* CONFIG_UTF8 */
|
||||
#ifdef CONFIG_COMBINE
|
||||
/* combine */ 0,
|
||||
#endif /* CONFIG_COMBINE */
|
||||
};
|
||||
|
||||
/** Default options for ::TERM_FREEBSD. */
|
||||
@ -299,8 +309,10 @@ static const struct screen_driver_opt freebsd_screen_driver_opt = {
|
||||
/* transparent: */ 1,
|
||||
#ifdef CONFIG_UTF8
|
||||
/* utf8_cp: */ 0,
|
||||
/* combine */ 0,
|
||||
#endif /* CONFIG_UTF8 */
|
||||
#ifdef CONFIG_COMBINE
|
||||
/* combine */ 0,
|
||||
#endif /* CONFIG_COMBINE */
|
||||
};
|
||||
|
||||
/** Default options for all the different types of terminals.
|
||||
@ -332,8 +344,10 @@ set_screen_driver_opt(struct screen_driver *driver, struct option *term_spec)
|
||||
* function need not carefully restore options one by one. */
|
||||
copy_struct(&driver->opt, screen_driver_opts[driver->type]);
|
||||
|
||||
#ifdef CONFIG_UTF8
|
||||
#ifdef CONFIG_COMBINE
|
||||
driver->opt.combine = get_opt_bool_tree(term_spec, "combine", NULL);
|
||||
#endif /* CONFIG_COMBINE */
|
||||
#ifdef CONFIG_UTF8
|
||||
/* Force UTF-8 I/O if the UTF-8 charset is selected. Various
|
||||
* places assume that the terminal's charset is unibyte if
|
||||
* UTF-8 I/O is disabled. (bug 827) */
|
||||
@ -648,6 +662,7 @@ add_char_data(struct string *screen, struct screen_driver *driver,
|
||||
}
|
||||
if (data == UCS_NO_CHAR)
|
||||
return;
|
||||
#ifdef CONFIG_COMBINE
|
||||
if (data >= UCS_BEGIN_COMBINED && data <= last_combined) {
|
||||
unicode_val_T *text = combined[data - UCS_BEGIN_COMBINED];
|
||||
|
||||
@ -663,6 +678,7 @@ add_char_data(struct string *screen, struct screen_driver *driver,
|
||||
data = *text;
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_COMBINE */
|
||||
if (!isscreensafe_ucs(data))
|
||||
data = UCS_SPACE;
|
||||
add_to_string(screen, encode_utf8(data));
|
||||
|
Loading…
Reference in New Issue
Block a user