1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-06-16 23:45:30 +00:00

Bug 824: Disable combining characters unless --enable-combining.

Label this as an experimental feature because it has so many bugs
and it is not clear how they can be fixed.
This commit is contained in:
Kalle Olavi Niemitalo 2008-01-19 19:56:50 +02:00 committed by Kalle Olavi Niemitalo
parent 14d1a0f3e2
commit f19c948ca7
12 changed files with 98 additions and 22 deletions

View File

@ -305,7 +305,7 @@ AC_CHECK_FUNCS(setenv putenv, HAVE_SETENV_OR_PUTENV=yes)
AC_CHECK_FUNCS(getuid, HAVE_GETUID=yes)
AC_CHECK_FUNCS(geteuid, HAVE_GETEUID=yes)
AC_CHECK_FUNCS(wcwidth)
AC_CHECK_FUNCS(wcwidth, HAVE_WCWIDTH=yes)
dnl These aren't probably needed now, as they are commented in links.h.
dnl I've no idea about their historical background, but I keep them here
@ -1338,6 +1338,9 @@ EL_ARG_ENABLE(CONFIG_SMALL, small, [Small binary],
EL_ARG_ENABLE(CONFIG_UTF8, utf-8, [UTF-8],
[ --disable-utf-8 disable UTF-8 support])
EL_ARG_DEPEND(CONFIG_COMBINE, combining, [CONFIG_UTF8:yes HAVE_WCWIDTH:yes], [Combining characters],
[ --enable-combining support Unicode combining characters (experimental)])
AC_ARG_ENABLE(weehoofooboomookerchoo,
[

View File

@ -621,7 +621,7 @@ CONFIG_SMALL=no
# support for double-width characters (like Japanese, etc.).
#
# Some features of Unicode are not handled at all. Combining characters is
# most visible absence.
# most visible absence; but see CONFIG_COMBINE below.
# Some features are partially supported. Like line breaking between
# double-width characters. There is no other detection for determining when to
# break or not.
@ -633,6 +633,42 @@ CONFIG_SMALL=no
CONFIG_UTF8=yes
### Unicode combining characters support
#
# Extends CONFIG_UTF8 with spotty support for combining characters
# such as U+0303 COMBINING TILDE.
#
# This feature is experimental and has been filed as enhancement 824.
# Known bugs and weaknesses:
#
# - It assumes wcwidth(wc)==0 means wc is a combining character.
# However, wcwidth also returns 0 for various control characters
# (e.g. U+200E LEFT-TO-RIGHT MARK), and apparently returns -1 if
# LC_CTYPE does not support the wide character. Besides, wchar_t
# might not be Unicode at all. ELinks should instead use Unicode
# character properties, perhaps via ICU.
#
# - It assumes all combining characters are nonspacing.
#
# - It works only if the terminal is using the UTF-8 charset.
#
# - It allocates an internal code for each different combining
# character sequence. A malicious web page could easily use up all
# the available codes, and the ELinks process would thenceforth be
# unable to display any new sequences.
#
# - It does not understand canonical equivalences.
#
# - Combining characters work only in HTML text. They do not work in
# HTML forms, HTML links, HTML document titles, plain text, menus,
# dialog boxes, or keymaps.
#
# - Combining characters at the end of the document do not take effect.
#
# Default: disabled
CONFIG_COMBINE=no
### Back-trace Printing
#

View File

@ -864,10 +864,12 @@ static struct option_info config_options_info[] = {
"only the subset of UTF-8 according to terminal codepage is used.\n"
"ELinks ignores this option if the terminal codepage is UTF-8.")),
#ifdef CONFIG_COMBINE
INIT_OPT_BOOL("terminal._template_", N_("Combining characters"),
"combine", 0, 0,
N_("Enable combining characters. It works only with "
"the xterm in UTF-8 mode.")),
#endif
INIT_OPT_BOOL("terminal._template_", N_("Restrict frames in cp850/852"),
"restrict_852", 0, 0,

View File

@ -91,7 +91,9 @@ enum termopt {
TERM_OPT_UTF_8_IO,
TERM_OPT_TRANSPARENCY,
TERM_OPT_UNDERLINE,
#ifdef CONFIG_COMBINE
TERM_OPT_COMBINE,
#endif
TERM_OPTIONS,
};
@ -105,7 +107,9 @@ static struct option_resolver resolvers[] = {
{ TERM_OPT_TRANSPARENCY, "transparency" },
{ TERM_OPT_UTF_8_IO, "utf_8_io" },
{ TERM_OPT_UNDERLINE, "underline" },
#ifdef CONFIG_COMBINE
{ TERM_OPT_COMBINE, "combine" },
#endif
};
static widget_handler_status_T
@ -151,7 +155,7 @@ push_save_button(struct dialog_data *dlg_data, struct widget_data *button)
#define RADIO_TRUE 0
#endif
#define TERMOPT_WIDGETS_COUNT (20 + RADIO_88 + RADIO_256 + RADIO_TRUE)
#define TERMOPT_WIDGETS_COUNT (12 + TERM_OPTIONS + RADIO_88 + RADIO_256 + RADIO_TRUE)
#define TERM_OPTION_VALUE_SIZE (sizeof(union option_value) * TERM_OPTIONS)
@ -232,7 +236,9 @@ terminal_options(struct terminal *term, void *xxx, struct session *ses)
add_dlg_checkbox(dlg, _("Transparency", term), &values[TERM_OPT_TRANSPARENCY].number);
add_dlg_checkbox(dlg, _("Underline", term), &values[TERM_OPT_UNDERLINE].number);
add_dlg_checkbox(dlg, _("UTF-8 I/O", term), &values[TERM_OPT_UTF_8_IO].number);
#ifdef CONFIG_COMBINE
add_dlg_checkbox(dlg, _("Combining characters", term), &values[TERM_OPT_COMBINE].number);
#endif
add_dlg_button(dlg, _("~OK", term), B_ENTER, push_ok_button, NULL);
if (!anonymous)

View File

@ -57,7 +57,7 @@ init_document(struct cache_entry *cached, struct document_options *options)
init_list(document->onload_snippets);
#endif
#ifdef CONFIG_UTF8
#ifdef CONFIG_COMBINE
document->comb_x = -1;
document->comb_y = -1;
#endif

View File

@ -201,6 +201,8 @@ struct document {
#ifdef CONFIG_UTF8
unsigned char buf[7];
unsigned char buf_length;
#endif
#ifdef CONFIG_COMBINE
/* base char + 5 combining chars = 6 */
unicode_val_T combi[UCS_MAX_LENGTH_COMBINED];
/* the number of combining characters. The base char is not counted. */

View File

@ -4,15 +4,18 @@
#include "config.h"
#endif
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCWIDTH)
#define _XOPEN_SOURCE 500
/* Our current implementation of combining characters requires
* wcwidth(). Therefore the configure script should have disabled
* CONFIG_COMBINE if wcwidth() doesn't exist. */
#ifdef CONFIG_COMBINE
#define _XOPEN_SOURCE 500 /* for wcwidth */
#endif
#include <ctype.h>
#include <stdarg.h>
#include <string.h>
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCWIDTH)
#ifdef HAVE_WCHAR_H
#include <wchar.h>
#endif
@ -489,7 +492,8 @@ good_char:
if (data == UCS_NO_BREAK_SPACE
&& html_context->options->wrap_nbsp)
data = UCS_SPACE;
#ifdef HAVE_WCWIDTH
#ifdef CONFIG_COMBINE
if (wcwidth((wchar_t)data)) {
if (document->combi_length) {
if (document->comb_x != -1) {
@ -521,7 +525,7 @@ good_char:
part->char_width[x] = unicode_to_cell(data);
schar->data = (unicode_val_T)data;
}
#ifdef HAVE_WCWIDTH
#ifdef CONFIG_COMBINE
document->comb_x = x;
document->comb_y = y;
#endif

View File

@ -770,8 +770,10 @@ cp_to_unicode(int codepage, unsigned char **string, unsigned char *end)
++*string;
return ret;
}
#endif /* CONFIG_UTF8 */
#ifdef CONFIG_COMBINE
unicode_val_T last_combined = UCS_BEGIN_COMBINED - 1;
unicode_val_T **combined;
struct hash *combined_hash;
@ -830,7 +832,8 @@ free_combined()
mem_free(combined[i]);
mem_free_if(combined);
}
#endif /* CONFIG_UTF8 */
#endif /* CONFIG_COMBINE */
static void
add_utf8(struct conv_table *ct, unicode_val_T u, const unsigned char *str)
@ -1540,4 +1543,3 @@ is_cp_utf8(int cp_index)
cp_index &= ~SYSTEM_CHARSET_FLAG;
return is_cp_ptr_utf8(&codepages[cp_index]);
}

View File

@ -27,12 +27,14 @@ typedef uint32_t unicode_val_T;
* for the second cell of a double-cell character. */
#define UCS_NO_CHAR ((unicode_val_T) 0xFFFFFFFD)
#ifdef CONFIG_COMBINE
#define UCS_END_COMBINED ((unicode_val_T) 0xFFFFFFFC)
#define UCS_BEGIN_COMBINED ((unicode_val_T) (UCS_END_COMBINED - (unicode_val_T) 10000))
/* Base character and up to 5 combining characters. */
#define UCS_MAX_LENGTH_COMBINED 6
#endif /* CONFIG_COMBINE */
/* If ELinks should display a double-cell character but there is only
* one cell available, it displays this character instead. This must
@ -154,15 +156,15 @@ unicode_val_T unicode_fold_label_case(unicode_val_T);
inline int strlen_utf8(unsigned char **);
inline unicode_val_T utf8_to_unicode(unsigned char **, const unsigned char *);
unicode_val_T cp_to_unicode(int, unsigned char **, unsigned char *);
#endif /* CONFIG_UTF8 */
#ifdef CONFIG_COMBINE
extern unicode_val_T last_combined;
extern unicode_val_T **combined;
extern struct hash *combined_hash;
unicode_val_T get_combined(unicode_val_T *, int);
void free_combined();
#endif /* CONFIG_UTF8 */
#endif /* CONFIG_COMBINE */
unicode_val_T cp2u(int, unsigned char);
const unsigned char *cp2utf8(int, int);

View File

@ -303,7 +303,7 @@ terminate_all_subsystems(void)
done_options();
done_event();
terminate_osdep();
#ifdef CONFIG_UTF8
#ifdef CONFIG_COMBINE
free_combined();
#endif
}

View File

@ -142,6 +142,9 @@ get_dyn_full_version(struct terminal *term, int more)
#endif
#ifdef CONFIG_UTF8
comma, "UTF-8",
#endif
#ifdef CONFIG_COMBINE
comma, _("Combining characters", term),
#endif
comma,
(unsigned char *) NULL

View File

@ -223,10 +223,12 @@ struct screen_driver {
* is the same as is_cp_utf8(charsets[0]), except the
* latter might crash if UTF-8 I/O is disabled. */
unsigned int utf8_cp:1;
#endif /* CONFIG_UTF8 */
#ifdef CONFIG_COMBINE
/* Whether the terminal supports combining characters. */
unsigned int combine:1;
#endif /* CONFIG_UTF8 */
#endif /* CONFIG_COMBINE */
} opt;
/* The terminal._template_ name. */
@ -243,8 +245,10 @@ static const struct screen_driver_opt dumb_screen_driver_opt = {
/* transparent: */ 1,
#ifdef CONFIG_UTF8
/* utf8_cp: */ 0,
/* combine */ 0,
#endif /* CONFIG_UTF8 */
#ifdef CONFIG_COMBINE
/* combine */ 0,
#endif /* CONFIG_COMBINE */
};
/** Default options for ::TERM_VT100. */
@ -257,8 +261,10 @@ static const struct screen_driver_opt vt100_screen_driver_opt = {
/* transparent: */ 1,
#ifdef CONFIG_UTF8
/* utf8_cp: */ 0,
/* combine */ 0,
#endif /* CONFIG_UTF8 */
#ifdef CONFIG_COMBINE
/* combine */ 0,
#endif /* CONFIG_COMBINE */
};
/** Default options for ::TERM_LINUX. */
@ -271,8 +277,10 @@ static const struct screen_driver_opt linux_screen_driver_opt = {
/* transparent: */ 1,
#ifdef CONFIG_UTF8
/* utf8_cp: */ 0,
/* combine */ 0,
#endif /* CONFIG_UTF8 */
#ifdef CONFIG_COMBINE
/* combine */ 0,
#endif /* CONFIG_COMBINE */
};
/** Default options for ::TERM_KOI8. */
@ -285,8 +293,10 @@ static const struct screen_driver_opt koi8_screen_driver_opt = {
/* transparent: */ 1,
#ifdef CONFIG_UTF8
/* utf8_cp: */ 0,
/* combine */ 0,
#endif /* CONFIG_UTF8 */
#ifdef CONFIG_COMBINE
/* combine */ 0,
#endif /* CONFIG_COMBINE */
};
/** Default options for ::TERM_FREEBSD. */
@ -299,8 +309,10 @@ static const struct screen_driver_opt freebsd_screen_driver_opt = {
/* transparent: */ 1,
#ifdef CONFIG_UTF8
/* utf8_cp: */ 0,
/* combine */ 0,
#endif /* CONFIG_UTF8 */
#ifdef CONFIG_COMBINE
/* combine */ 0,
#endif /* CONFIG_COMBINE */
};
/** Default options for all the different types of terminals.
@ -332,8 +344,10 @@ set_screen_driver_opt(struct screen_driver *driver, struct option *term_spec)
* function need not carefully restore options one by one. */
copy_struct(&driver->opt, screen_driver_opts[driver->type]);
#ifdef CONFIG_UTF8
#ifdef CONFIG_COMBINE
driver->opt.combine = get_opt_bool_tree(term_spec, "combine", NULL);
#endif /* CONFIG_COMBINE */
#ifdef CONFIG_UTF8
/* Force UTF-8 I/O if the UTF-8 charset is selected. Various
* places assume that the terminal's charset is unibyte if
* UTF-8 I/O is disabled. (bug 827) */
@ -648,6 +662,7 @@ add_char_data(struct string *screen, struct screen_driver *driver,
}
if (data == UCS_NO_CHAR)
return;
#ifdef CONFIG_COMBINE
if (data >= UCS_BEGIN_COMBINED && data <= last_combined) {
unicode_val_T *text = combined[data - UCS_BEGIN_COMBINED];
@ -663,6 +678,7 @@ add_char_data(struct string *screen, struct screen_driver *driver,
data = *text;
}
}
#endif /* CONFIG_COMBINE */
if (!isscreensafe_ucs(data))
data = UCS_SPACE;
add_to_string(screen, encode_utf8(data));