1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-12-04 14:46:47 -05:00

Bug 824: Disable combining characters unless --enable-combining.

Label this as an experimental feature because it has so many bugs
and it is not clear how they can be fixed.
This commit is contained in:
Kalle Olavi Niemitalo 2008-01-19 19:56:50 +02:00 committed by Kalle Olavi Niemitalo
parent 14d1a0f3e2
commit f19c948ca7
12 changed files with 98 additions and 22 deletions

View File

@ -305,7 +305,7 @@ AC_CHECK_FUNCS(setenv putenv, HAVE_SETENV_OR_PUTENV=yes)
AC_CHECK_FUNCS(getuid, HAVE_GETUID=yes) AC_CHECK_FUNCS(getuid, HAVE_GETUID=yes)
AC_CHECK_FUNCS(geteuid, HAVE_GETEUID=yes) AC_CHECK_FUNCS(geteuid, HAVE_GETEUID=yes)
AC_CHECK_FUNCS(wcwidth) AC_CHECK_FUNCS(wcwidth, HAVE_WCWIDTH=yes)
dnl These aren't probably needed now, as they are commented in links.h. dnl These aren't probably needed now, as they are commented in links.h.
dnl I've no idea about their historical background, but I keep them here dnl I've no idea about their historical background, but I keep them here
@ -1338,6 +1338,9 @@ EL_ARG_ENABLE(CONFIG_SMALL, small, [Small binary],
EL_ARG_ENABLE(CONFIG_UTF8, utf-8, [UTF-8], EL_ARG_ENABLE(CONFIG_UTF8, utf-8, [UTF-8],
[ --disable-utf-8 disable UTF-8 support]) [ --disable-utf-8 disable UTF-8 support])
EL_ARG_DEPEND(CONFIG_COMBINE, combining, [CONFIG_UTF8:yes HAVE_WCWIDTH:yes], [Combining characters],
[ --enable-combining support Unicode combining characters (experimental)])
AC_ARG_ENABLE(weehoofooboomookerchoo, AC_ARG_ENABLE(weehoofooboomookerchoo,
[ [

View File

@ -621,7 +621,7 @@ CONFIG_SMALL=no
# support for double-width characters (like Japanese, etc.). # support for double-width characters (like Japanese, etc.).
# #
# Some features of Unicode are not handled at all. Combining characters is # Some features of Unicode are not handled at all. Combining characters is
# most visible absence. # most visible absence; but see CONFIG_COMBINE below.
# Some features are partially supported. Like line breaking between # Some features are partially supported. Like line breaking between
# double-width characters. There is no other detection for determining when to # double-width characters. There is no other detection for determining when to
# break or not. # break or not.
@ -633,6 +633,42 @@ CONFIG_SMALL=no
CONFIG_UTF8=yes CONFIG_UTF8=yes
### Unicode combining characters support
#
# Extends CONFIG_UTF8 with spotty support for combining characters
# such as U+0303 COMBINING TILDE.
#
# This feature is experimental and has been filed as enhancement 824.
# Known bugs and weaknesses:
#
# - It assumes wcwidth(wc)==0 means wc is a combining character.
# However, wcwidth also returns 0 for various control characters
# (e.g. U+200E LEFT-TO-RIGHT MARK), and apparently returns -1 if
# LC_CTYPE does not support the wide character. Besides, wchar_t
# might not be Unicode at all. ELinks should instead use Unicode
# character properties, perhaps via ICU.
#
# - It assumes all combining characters are nonspacing.
#
# - It works only if the terminal is using the UTF-8 charset.
#
# - It allocates an internal code for each different combining
# character sequence. A malicious web page could easily use up all
# the available codes, and the ELinks process would thenceforth be
# unable to display any new sequences.
#
# - It does not understand canonical equivalences.
#
# - Combining characters work only in HTML text. They do not work in
# HTML forms, HTML links, HTML document titles, plain text, menus,
# dialog boxes, or keymaps.
#
# - Combining characters at the end of the document do not take effect.
#
# Default: disabled
CONFIG_COMBINE=no
### Back-trace Printing ### Back-trace Printing
# #

View File

@ -864,10 +864,12 @@ static struct option_info config_options_info[] = {
"only the subset of UTF-8 according to terminal codepage is used.\n" "only the subset of UTF-8 according to terminal codepage is used.\n"
"ELinks ignores this option if the terminal codepage is UTF-8.")), "ELinks ignores this option if the terminal codepage is UTF-8.")),
#ifdef CONFIG_COMBINE
INIT_OPT_BOOL("terminal._template_", N_("Combining characters"), INIT_OPT_BOOL("terminal._template_", N_("Combining characters"),
"combine", 0, 0, "combine", 0, 0,
N_("Enable combining characters. It works only with " N_("Enable combining characters. It works only with "
"the xterm in UTF-8 mode.")), "the xterm in UTF-8 mode.")),
#endif
INIT_OPT_BOOL("terminal._template_", N_("Restrict frames in cp850/852"), INIT_OPT_BOOL("terminal._template_", N_("Restrict frames in cp850/852"),
"restrict_852", 0, 0, "restrict_852", 0, 0,

View File

@ -91,7 +91,9 @@ enum termopt {
TERM_OPT_UTF_8_IO, TERM_OPT_UTF_8_IO,
TERM_OPT_TRANSPARENCY, TERM_OPT_TRANSPARENCY,
TERM_OPT_UNDERLINE, TERM_OPT_UNDERLINE,
#ifdef CONFIG_COMBINE
TERM_OPT_COMBINE, TERM_OPT_COMBINE,
#endif
TERM_OPTIONS, TERM_OPTIONS,
}; };
@ -105,7 +107,9 @@ static struct option_resolver resolvers[] = {
{ TERM_OPT_TRANSPARENCY, "transparency" }, { TERM_OPT_TRANSPARENCY, "transparency" },
{ TERM_OPT_UTF_8_IO, "utf_8_io" }, { TERM_OPT_UTF_8_IO, "utf_8_io" },
{ TERM_OPT_UNDERLINE, "underline" }, { TERM_OPT_UNDERLINE, "underline" },
#ifdef CONFIG_COMBINE
{ TERM_OPT_COMBINE, "combine" }, { TERM_OPT_COMBINE, "combine" },
#endif
}; };
static widget_handler_status_T static widget_handler_status_T
@ -151,7 +155,7 @@ push_save_button(struct dialog_data *dlg_data, struct widget_data *button)
#define RADIO_TRUE 0 #define RADIO_TRUE 0
#endif #endif
#define TERMOPT_WIDGETS_COUNT (20 + RADIO_88 + RADIO_256 + RADIO_TRUE) #define TERMOPT_WIDGETS_COUNT (12 + TERM_OPTIONS + RADIO_88 + RADIO_256 + RADIO_TRUE)
#define TERM_OPTION_VALUE_SIZE (sizeof(union option_value) * TERM_OPTIONS) #define TERM_OPTION_VALUE_SIZE (sizeof(union option_value) * TERM_OPTIONS)
@ -232,7 +236,9 @@ terminal_options(struct terminal *term, void *xxx, struct session *ses)
add_dlg_checkbox(dlg, _("Transparency", term), &values[TERM_OPT_TRANSPARENCY].number); add_dlg_checkbox(dlg, _("Transparency", term), &values[TERM_OPT_TRANSPARENCY].number);
add_dlg_checkbox(dlg, _("Underline", term), &values[TERM_OPT_UNDERLINE].number); add_dlg_checkbox(dlg, _("Underline", term), &values[TERM_OPT_UNDERLINE].number);
add_dlg_checkbox(dlg, _("UTF-8 I/O", term), &values[TERM_OPT_UTF_8_IO].number); add_dlg_checkbox(dlg, _("UTF-8 I/O", term), &values[TERM_OPT_UTF_8_IO].number);
#ifdef CONFIG_COMBINE
add_dlg_checkbox(dlg, _("Combining characters", term), &values[TERM_OPT_COMBINE].number); add_dlg_checkbox(dlg, _("Combining characters", term), &values[TERM_OPT_COMBINE].number);
#endif
add_dlg_button(dlg, _("~OK", term), B_ENTER, push_ok_button, NULL); add_dlg_button(dlg, _("~OK", term), B_ENTER, push_ok_button, NULL);
if (!anonymous) if (!anonymous)

View File

@ -57,7 +57,7 @@ init_document(struct cache_entry *cached, struct document_options *options)
init_list(document->onload_snippets); init_list(document->onload_snippets);
#endif #endif
#ifdef CONFIG_UTF8 #ifdef CONFIG_COMBINE
document->comb_x = -1; document->comb_x = -1;
document->comb_y = -1; document->comb_y = -1;
#endif #endif

View File

@ -201,6 +201,8 @@ struct document {
#ifdef CONFIG_UTF8 #ifdef CONFIG_UTF8
unsigned char buf[7]; unsigned char buf[7];
unsigned char buf_length; unsigned char buf_length;
#endif
#ifdef CONFIG_COMBINE
/* base char + 5 combining chars = 6 */ /* base char + 5 combining chars = 6 */
unicode_val_T combi[UCS_MAX_LENGTH_COMBINED]; unicode_val_T combi[UCS_MAX_LENGTH_COMBINED];
/* the number of combining characters. The base char is not counted. */ /* the number of combining characters. The base char is not counted. */

View File

@ -4,15 +4,18 @@
#include "config.h" #include "config.h"
#endif #endif
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCWIDTH) /* Our current implementation of combining characters requires
#define _XOPEN_SOURCE 500 * wcwidth(). Therefore the configure script should have disabled
* CONFIG_COMBINE if wcwidth() doesn't exist. */
#ifdef CONFIG_COMBINE
#define _XOPEN_SOURCE 500 /* for wcwidth */
#endif #endif
#include <ctype.h> #include <ctype.h>
#include <stdarg.h> #include <stdarg.h>
#include <string.h> #include <string.h>
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCWIDTH) #ifdef HAVE_WCHAR_H
#include <wchar.h> #include <wchar.h>
#endif #endif
@ -489,7 +492,8 @@ good_char:
if (data == UCS_NO_BREAK_SPACE if (data == UCS_NO_BREAK_SPACE
&& html_context->options->wrap_nbsp) && html_context->options->wrap_nbsp)
data = UCS_SPACE; data = UCS_SPACE;
#ifdef HAVE_WCWIDTH
#ifdef CONFIG_COMBINE
if (wcwidth((wchar_t)data)) { if (wcwidth((wchar_t)data)) {
if (document->combi_length) { if (document->combi_length) {
if (document->comb_x != -1) { if (document->comb_x != -1) {
@ -521,7 +525,7 @@ good_char:
part->char_width[x] = unicode_to_cell(data); part->char_width[x] = unicode_to_cell(data);
schar->data = (unicode_val_T)data; schar->data = (unicode_val_T)data;
} }
#ifdef HAVE_WCWIDTH #ifdef CONFIG_COMBINE
document->comb_x = x; document->comb_x = x;
document->comb_y = y; document->comb_y = y;
#endif #endif

View File

@ -770,8 +770,10 @@ cp_to_unicode(int codepage, unsigned char **string, unsigned char *end)
++*string; ++*string;
return ret; return ret;
} }
#endif /* CONFIG_UTF8 */
#ifdef CONFIG_COMBINE
unicode_val_T last_combined = UCS_BEGIN_COMBINED - 1; unicode_val_T last_combined = UCS_BEGIN_COMBINED - 1;
unicode_val_T **combined; unicode_val_T **combined;
struct hash *combined_hash; struct hash *combined_hash;
@ -830,7 +832,8 @@ free_combined()
mem_free(combined[i]); mem_free(combined[i]);
mem_free_if(combined); mem_free_if(combined);
} }
#endif /* CONFIG_UTF8 */ #endif /* CONFIG_COMBINE */
static void static void
add_utf8(struct conv_table *ct, unicode_val_T u, const unsigned char *str) add_utf8(struct conv_table *ct, unicode_val_T u, const unsigned char *str)
@ -1540,4 +1543,3 @@ is_cp_utf8(int cp_index)
cp_index &= ~SYSTEM_CHARSET_FLAG; cp_index &= ~SYSTEM_CHARSET_FLAG;
return is_cp_ptr_utf8(&codepages[cp_index]); return is_cp_ptr_utf8(&codepages[cp_index]);
} }

View File

@ -27,12 +27,14 @@ typedef uint32_t unicode_val_T;
* for the second cell of a double-cell character. */ * for the second cell of a double-cell character. */
#define UCS_NO_CHAR ((unicode_val_T) 0xFFFFFFFD) #define UCS_NO_CHAR ((unicode_val_T) 0xFFFFFFFD)
#ifdef CONFIG_COMBINE
#define UCS_END_COMBINED ((unicode_val_T) 0xFFFFFFFC) #define UCS_END_COMBINED ((unicode_val_T) 0xFFFFFFFC)
#define UCS_BEGIN_COMBINED ((unicode_val_T) (UCS_END_COMBINED - (unicode_val_T) 10000)) #define UCS_BEGIN_COMBINED ((unicode_val_T) (UCS_END_COMBINED - (unicode_val_T) 10000))
/* Base character and up to 5 combining characters. */ /* Base character and up to 5 combining characters. */
#define UCS_MAX_LENGTH_COMBINED 6 #define UCS_MAX_LENGTH_COMBINED 6
#endif /* CONFIG_COMBINE */
/* If ELinks should display a double-cell character but there is only /* If ELinks should display a double-cell character but there is only
* one cell available, it displays this character instead. This must * one cell available, it displays this character instead. This must
@ -154,15 +156,15 @@ unicode_val_T unicode_fold_label_case(unicode_val_T);
inline int strlen_utf8(unsigned char **); inline int strlen_utf8(unsigned char **);
inline unicode_val_T utf8_to_unicode(unsigned char **, const unsigned char *); inline unicode_val_T utf8_to_unicode(unsigned char **, const unsigned char *);
unicode_val_T cp_to_unicode(int, unsigned char **, unsigned char *); unicode_val_T cp_to_unicode(int, unsigned char **, unsigned char *);
#endif /* CONFIG_UTF8 */
#ifdef CONFIG_COMBINE
extern unicode_val_T last_combined; extern unicode_val_T last_combined;
extern unicode_val_T **combined; extern unicode_val_T **combined;
extern struct hash *combined_hash; extern struct hash *combined_hash;
unicode_val_T get_combined(unicode_val_T *, int); unicode_val_T get_combined(unicode_val_T *, int);
void free_combined(); void free_combined();
#endif /* CONFIG_COMBINE */
#endif /* CONFIG_UTF8 */
unicode_val_T cp2u(int, unsigned char); unicode_val_T cp2u(int, unsigned char);
const unsigned char *cp2utf8(int, int); const unsigned char *cp2utf8(int, int);

View File

@ -303,7 +303,7 @@ terminate_all_subsystems(void)
done_options(); done_options();
done_event(); done_event();
terminate_osdep(); terminate_osdep();
#ifdef CONFIG_UTF8 #ifdef CONFIG_COMBINE
free_combined(); free_combined();
#endif #endif
} }

View File

@ -142,6 +142,9 @@ get_dyn_full_version(struct terminal *term, int more)
#endif #endif
#ifdef CONFIG_UTF8 #ifdef CONFIG_UTF8
comma, "UTF-8", comma, "UTF-8",
#endif
#ifdef CONFIG_COMBINE
comma, _("Combining characters", term),
#endif #endif
comma, comma,
(unsigned char *) NULL (unsigned char *) NULL

View File

@ -223,10 +223,12 @@ struct screen_driver {
* is the same as is_cp_utf8(charsets[0]), except the * is the same as is_cp_utf8(charsets[0]), except the
* latter might crash if UTF-8 I/O is disabled. */ * latter might crash if UTF-8 I/O is disabled. */
unsigned int utf8_cp:1; unsigned int utf8_cp:1;
#endif /* CONFIG_UTF8 */
#ifdef CONFIG_COMBINE
/* Whether the terminal supports combining characters. */ /* Whether the terminal supports combining characters. */
unsigned int combine:1; unsigned int combine:1;
#endif /* CONFIG_UTF8 */ #endif /* CONFIG_COMBINE */
} opt; } opt;
/* The terminal._template_ name. */ /* The terminal._template_ name. */
@ -243,8 +245,10 @@ static const struct screen_driver_opt dumb_screen_driver_opt = {
/* transparent: */ 1, /* transparent: */ 1,
#ifdef CONFIG_UTF8 #ifdef CONFIG_UTF8
/* utf8_cp: */ 0, /* utf8_cp: */ 0,
/* combine */ 0,
#endif /* CONFIG_UTF8 */ #endif /* CONFIG_UTF8 */
#ifdef CONFIG_COMBINE
/* combine */ 0,
#endif /* CONFIG_COMBINE */
}; };
/** Default options for ::TERM_VT100. */ /** Default options for ::TERM_VT100. */
@ -257,8 +261,10 @@ static const struct screen_driver_opt vt100_screen_driver_opt = {
/* transparent: */ 1, /* transparent: */ 1,
#ifdef CONFIG_UTF8 #ifdef CONFIG_UTF8
/* utf8_cp: */ 0, /* utf8_cp: */ 0,
/* combine */ 0,
#endif /* CONFIG_UTF8 */ #endif /* CONFIG_UTF8 */
#ifdef CONFIG_COMBINE
/* combine */ 0,
#endif /* CONFIG_COMBINE */
}; };
/** Default options for ::TERM_LINUX. */ /** Default options for ::TERM_LINUX. */
@ -271,8 +277,10 @@ static const struct screen_driver_opt linux_screen_driver_opt = {
/* transparent: */ 1, /* transparent: */ 1,
#ifdef CONFIG_UTF8 #ifdef CONFIG_UTF8
/* utf8_cp: */ 0, /* utf8_cp: */ 0,
/* combine */ 0,
#endif /* CONFIG_UTF8 */ #endif /* CONFIG_UTF8 */
#ifdef CONFIG_COMBINE
/* combine */ 0,
#endif /* CONFIG_COMBINE */
}; };
/** Default options for ::TERM_KOI8. */ /** Default options for ::TERM_KOI8. */
@ -285,8 +293,10 @@ static const struct screen_driver_opt koi8_screen_driver_opt = {
/* transparent: */ 1, /* transparent: */ 1,
#ifdef CONFIG_UTF8 #ifdef CONFIG_UTF8
/* utf8_cp: */ 0, /* utf8_cp: */ 0,
/* combine */ 0,
#endif /* CONFIG_UTF8 */ #endif /* CONFIG_UTF8 */
#ifdef CONFIG_COMBINE
/* combine */ 0,
#endif /* CONFIG_COMBINE */
}; };
/** Default options for ::TERM_FREEBSD. */ /** Default options for ::TERM_FREEBSD. */
@ -299,8 +309,10 @@ static const struct screen_driver_opt freebsd_screen_driver_opt = {
/* transparent: */ 1, /* transparent: */ 1,
#ifdef CONFIG_UTF8 #ifdef CONFIG_UTF8
/* utf8_cp: */ 0, /* utf8_cp: */ 0,
/* combine */ 0,
#endif /* CONFIG_UTF8 */ #endif /* CONFIG_UTF8 */
#ifdef CONFIG_COMBINE
/* combine */ 0,
#endif /* CONFIG_COMBINE */
}; };
/** Default options for all the different types of terminals. /** Default options for all the different types of terminals.
@ -332,8 +344,10 @@ set_screen_driver_opt(struct screen_driver *driver, struct option *term_spec)
* function need not carefully restore options one by one. */ * function need not carefully restore options one by one. */
copy_struct(&driver->opt, screen_driver_opts[driver->type]); copy_struct(&driver->opt, screen_driver_opts[driver->type]);
#ifdef CONFIG_UTF8 #ifdef CONFIG_COMBINE
driver->opt.combine = get_opt_bool_tree(term_spec, "combine", NULL); driver->opt.combine = get_opt_bool_tree(term_spec, "combine", NULL);
#endif /* CONFIG_COMBINE */
#ifdef CONFIG_UTF8
/* Force UTF-8 I/O if the UTF-8 charset is selected. Various /* Force UTF-8 I/O if the UTF-8 charset is selected. Various
* places assume that the terminal's charset is unibyte if * places assume that the terminal's charset is unibyte if
* UTF-8 I/O is disabled. (bug 827) */ * UTF-8 I/O is disabled. (bug 827) */
@ -648,6 +662,7 @@ add_char_data(struct string *screen, struct screen_driver *driver,
} }
if (data == UCS_NO_CHAR) if (data == UCS_NO_CHAR)
return; return;
#ifdef CONFIG_COMBINE
if (data >= UCS_BEGIN_COMBINED && data <= last_combined) { if (data >= UCS_BEGIN_COMBINED && data <= last_combined) {
unicode_val_T *text = combined[data - UCS_BEGIN_COMBINED]; unicode_val_T *text = combined[data - UCS_BEGIN_COMBINED];
@ -663,6 +678,7 @@ add_char_data(struct string *screen, struct screen_driver *driver,
data = *text; data = *text;
} }
} }
#endif /* CONFIG_COMBINE */
if (!isscreensafe_ucs(data)) if (!isscreensafe_ucs(data))
data = UCS_SPACE; data = UCS_SPACE;
add_to_string(screen, encode_utf8(data)); add_to_string(screen, encode_utf8(data));