diff --git a/src/config/options.inc b/src/config/options.inc index f01be47b..61211142 100644 --- a/src/config/options.inc +++ b/src/config/options.inc @@ -864,6 +864,11 @@ static struct option_info config_options_info[] = { "only the subset of UTF-8 according to terminal codepage is used.\n" "ELinks ignores this option if the terminal codepage is UTF-8.")), + INIT_OPT_BOOL("terminal._template_", N_("Combining characters"), + "combine", 0, 0, + N_("Enable combining characters. It works only with " + "the xterm in UTF-8 mode.")), + INIT_OPT_BOOL("terminal._template_", N_("Restrict frames in cp850/852"), "restrict_852", 0, 0, N_("Restrict the characters used when drawing lines. Makes sense\n" diff --git a/src/dialogs/options.c b/src/dialogs/options.c index 20cf53cf..ebb343bd 100644 --- a/src/dialogs/options.c +++ b/src/dialogs/options.c @@ -91,6 +91,7 @@ enum termopt { TERM_OPT_UTF_8_IO, TERM_OPT_TRANSPARENCY, TERM_OPT_UNDERLINE, + TERM_OPT_COMBINE, TERM_OPTIONS, }; @@ -104,6 +105,7 @@ static struct option_resolver resolvers[] = { { TERM_OPT_TRANSPARENCY, "transparency" }, { TERM_OPT_UTF_8_IO, "utf_8_io" }, { TERM_OPT_UNDERLINE, "underline" }, + { TERM_OPT_COMBINE, "combine" }, }; static widget_handler_status_T @@ -149,7 +151,7 @@ push_save_button(struct dialog_data *dlg_data, struct widget_data *button) #define RADIO_TRUE 0 #endif -#define TERMOPT_WIDGETS_COUNT (19 + RADIO_88 + RADIO_256 + RADIO_TRUE) +#define TERMOPT_WIDGETS_COUNT (20 + RADIO_88 + RADIO_256 + RADIO_TRUE) #define TERM_OPTION_VALUE_SIZE (sizeof(union option_value) * TERM_OPTIONS) @@ -230,6 +232,7 @@ terminal_options(struct terminal *term, void *xxx, struct session *ses) add_dlg_checkbox(dlg, _("Transparency", term), &values[TERM_OPT_TRANSPARENCY].number); add_dlg_checkbox(dlg, _("Underline", term), &values[TERM_OPT_UNDERLINE].number); add_dlg_checkbox(dlg, _("UTF-8 I/O", term), &values[TERM_OPT_UTF_8_IO].number); + add_dlg_checkbox(dlg, _("Combining characters", term), &values[TERM_OPT_COMBINE].number); add_dlg_button(dlg, _("~OK", term), B_ENTER, push_ok_button, NULL); if (!anonymous) diff --git a/src/intl/charsets.c b/src/intl/charsets.c index 736485cd..fb1a0b39 100644 --- a/src/intl/charsets.c +++ b/src/intl/charsets.c @@ -25,6 +25,7 @@ #include "util/conv.h" #include "util/error.h" #include "util/fastfind.h" +#include "util/hash.h" #include "util/memory.h" #include "util/string.h" @@ -769,9 +770,68 @@ cp_to_unicode(int codepage, unsigned char **string, unsigned char *end) ++*string; return ret; } -#endif /* CONFIG_UTF8 */ +unicode_val_T last_combined = UCS_BEGIN_COMBINED - 1; +unicode_val_T **combined; +struct hash *combined_hash; + +unicode_val_T +get_combined(unicode_val_T *data, int length) +{ + struct hash_item *item; + unicode_val_T *key; + int i, indeks; + + assert(length >= 1 && length <= UCS_MAX_LENGTH_COMBINED); + if_assert_failed return UCS_NO_CHAR; + + if (!combined_hash) combined_hash = init_hash8(); + if (!combined_hash) return UCS_NO_CHAR; + item = get_hash_item(combined_hash, (unsigned char *)data, length * sizeof(*data)); + + if (item) return (unicode_val_T)(long)item->value; + if (last_combined >= UCS_END_COMBINED) return UCS_NO_CHAR; + + key = mem_alloc((length + 1) * sizeof(*key)); + if (!key) return UCS_NO_CHAR; + for (i = 0; i < length; i++) + key[i] = data[i]; + key[i] = UCS_END_COMBINED; + + last_combined++; + indeks = last_combined - UCS_BEGIN_COMBINED; + + combined = mem_realloc(combined, sizeof(*combined) * (indeks + 1)); + if (!combined) { + mem_free(key); + last_combined--; + return UCS_NO_CHAR; + } + combined[indeks] = key; + item = add_hash_item(combined_hash, (unsigned char *)key, + length * sizeof(*data), (void *)(long)(last_combined)); + if (!item) { + last_combined--; + mem_free(key); + return UCS_NO_CHAR; + } + return last_combined; +} + +void +free_combined() +{ + int i, end = last_combined - UCS_BEGIN_COMBINED + 1; + + if (combined_hash) + free_hash(&combined_hash); + for (i = 0; i < end; i++) + mem_free(combined[i]); + mem_free_if(combined); +} +#endif /* CONFIG_UTF8 */ + static void add_utf8(struct conv_table *ct, unicode_val_T u, const unsigned char *str) { @@ -1480,3 +1540,4 @@ is_cp_utf8(int cp_index) cp_index &= ~SYSTEM_CHARSET_FLAG; return is_cp_ptr_utf8(&codepages[cp_index]); } + diff --git a/src/intl/charsets.h b/src/intl/charsets.h index 75e6f843..8eb06fca 100644 --- a/src/intl/charsets.h +++ b/src/intl/charsets.h @@ -1,6 +1,7 @@ #ifndef EL__INTL_CHARSETS_H #define EL__INTL_CHARSETS_H +struct hash; typedef uint32_t unicode_val_T; /* U+0020 SPACE. Normally the same as ' ' or L' ' but perhaps ELinks @@ -26,6 +27,13 @@ typedef uint32_t unicode_val_T; * for the second cell of a double-cell character. */ #define UCS_NO_CHAR ((unicode_val_T) 0xFFFFFFFD) +#define UCS_END_COMBINED ((unicode_val_T) 0xFFFFFFFC) + +#define UCS_BEGIN_COMBINED ((unicode_val_T) (UCS_END_COMBINED - (unicode_val_T) 10000)) + +/* Base character and up to 5 combining characters. */ +#define UCS_MAX_LENGTH_COMBINED 6 + /* If ELinks should display a double-cell character but there is only * one cell available, it displays this character instead. This must * be a single-cell character but need not be unique. Possible values @@ -146,6 +154,14 @@ unicode_val_T unicode_fold_label_case(unicode_val_T); inline int strlen_utf8(unsigned char **); inline unicode_val_T utf8_to_unicode(unsigned char **, const unsigned char *); unicode_val_T cp_to_unicode(int, unsigned char **, unsigned char *); + + +extern unicode_val_T last_combined; +extern unicode_val_T **combined; +extern struct hash *combined_hash; +unicode_val_T get_combined(unicode_val_T *, int); +void free_combined(); + #endif /* CONFIG_UTF8 */ unicode_val_T cp2u(int, unsigned char); diff --git a/src/main/main.c b/src/main/main.c index 8b4fe44d..cdea245b 100644 --- a/src/main/main.c +++ b/src/main/main.c @@ -49,6 +49,7 @@ #include "util/color.h" #include "util/error.h" #include "util/file.h" +#include "util/hash.h" #include "util/memdebug.h" #include "util/memory.h" #include "viewer/dump/dump.h" @@ -302,6 +303,9 @@ terminate_all_subsystems(void) done_options(); done_event(); terminate_osdep(); +#ifdef CONFIG_UTF8 + free_combined(); +#endif } void diff --git a/src/terminal/screen.c b/src/terminal/screen.c index 4ac6dc69..162c454b 100644 --- a/src/terminal/screen.c +++ b/src/terminal/screen.c @@ -223,6 +223,9 @@ struct screen_driver { * is the same as is_cp_utf8(charsets[0]), except the * latter might crash if UTF-8 I/O is disabled. */ unsigned int utf8_cp:1; + + /* Whether the terminal supports combining characters. */ + unsigned int combine:1; #endif /* CONFIG_UTF8 */ } opt; @@ -240,6 +243,7 @@ static const struct screen_driver_opt dumb_screen_driver_opt = { /* transparent: */ 1, #ifdef CONFIG_UTF8 /* utf8_cp: */ 0, + /* combine */ 0, #endif /* CONFIG_UTF8 */ }; @@ -253,6 +257,7 @@ static const struct screen_driver_opt vt100_screen_driver_opt = { /* transparent: */ 1, #ifdef CONFIG_UTF8 /* utf8_cp: */ 0, + /* combine */ 0, #endif /* CONFIG_UTF8 */ }; @@ -266,6 +271,7 @@ static const struct screen_driver_opt linux_screen_driver_opt = { /* transparent: */ 1, #ifdef CONFIG_UTF8 /* utf8_cp: */ 0, + /* combine */ 0, #endif /* CONFIG_UTF8 */ }; @@ -279,6 +285,7 @@ static const struct screen_driver_opt koi8_screen_driver_opt = { /* transparent: */ 1, #ifdef CONFIG_UTF8 /* utf8_cp: */ 0, + /* combine */ 0, #endif /* CONFIG_UTF8 */ }; @@ -292,6 +299,7 @@ static const struct screen_driver_opt freebsd_screen_driver_opt = { /* transparent: */ 1, #ifdef CONFIG_UTF8 /* utf8_cp: */ 0, + /* combine */ 0, #endif /* CONFIG_UTF8 */ }; @@ -325,6 +333,7 @@ set_screen_driver_opt(struct screen_driver *driver, struct option *term_spec) copy_struct(&driver->opt, screen_driver_opts[driver->type]); #ifdef CONFIG_UTF8 + driver->opt.combine = get_opt_bool_tree(term_spec, "combine", NULL); /* Force UTF-8 I/O if the UTF-8 charset is selected. Various * places assume that the terminal's charset is unibyte if * UTF-8 I/O is disabled. (bug 827) */ @@ -639,6 +648,21 @@ add_char_data(struct string *screen, struct screen_driver *driver, } if (data == UCS_NO_CHAR) return; + if (data >= UCS_BEGIN_COMBINED && data <= last_combined) { + unicode_val_T *text = combined[data - UCS_BEGIN_COMBINED]; + + if (driver->opt.combine) { + /* XTerm */ + while (*text != UCS_END_COMBINED) { + add_to_string(screen, encode_utf8(*text)); + text++; + } + return; + } else { + /* Others */ + data = *text; + } + } if (!isscreensafe_ucs(data)) data = UCS_SPACE; add_to_string(screen, encode_utf8(data));