From 0d8632943d8fb526fbd4d853c86294d0037c43f6 Mon Sep 17 00:00:00 2001 From: dequis Date: Thu, 23 Aug 2018 02:02:33 -0300 Subject: [PATCH] Add a wrapper of wcwidth() that picks the best implementation This adds a i_wcwidth() function that replaces mk_wcwidth(), and a 'wcwidth_implementation' setting to pick which one it wraps. Values: - old: uses our local mk_wcwidth() which implements unicode 5.0 - system: uses the libc-provided wcwidth(), which may be better or worse than ours depending on how up to date the system is. - auto: tests the system one against two characters that became fullwidth in unicode 5.2 and 9.0 respectively. If either of them pass, pick the system implementation, otherwise pick ours. It defaults to auto. mk_wcwidth() is still preferable in some cases, since the way it uses ranges for fullwidth characters means most CJK blocks are covered even if their characters didn't exist back then. The "system" implementation is also wrapped to never return -1, but to assume those unknown characters use one cell. Quoting the code: /* Treat all unknown characters as taking one cell. This is * the reason mk_wcwidth and other outdated implementations * mostly worked with newer unicode, while glibc's wcwidth * needs updating to recognize new characters. * * Instead of relying on that, we keep the behavior of assuming * one cell even for glibc's implementation, which is still * highly accurate and less of a headache overall. */ --- src/core/Makefile.am | 1 + src/core/core.c | 5 ++ src/core/utf8.c | 2 +- src/core/utf8.h | 6 ++ src/core/wcwidth-wrapper.c | 117 ++++++++++++++++++++++++++++++++++ src/fe-text/gui-entry.c | 16 ++--- src/fe-text/term-terminfo.c | 4 +- src/fe-text/textbuffer-view.c | 2 +- 8 files changed, 141 insertions(+), 12 deletions(-) create mode 100644 src/core/wcwidth-wrapper.c diff --git a/src/core/Makefile.am b/src/core/Makefile.am index f64d9e2e..4cc2226c 100644 --- a/src/core/Makefile.am +++ b/src/core/Makefile.am @@ -53,6 +53,7 @@ libcore_a_SOURCES = \ utf8.c \ $(regex_impl) \ wcwidth.c \ + wcwidth-wrapper.c \ tls.c \ write-buffer.c diff --git a/src/core/core.c b/src/core/core.c index 506d6a13..3c335fd2 100644 --- a/src/core/core.c +++ b/src/core/core.c @@ -60,6 +60,9 @@ void chat_commands_deinit(void); void log_away_init(void); void log_away_deinit(void); +void wcwidth_wrapper_init(void); +void wcwidth_wrapper_deinit(void); + int irssi_gui; int irssi_init_finished; int reload_config; @@ -258,6 +261,7 @@ void core_init(void) nicklist_init(); chat_commands_init(); + wcwidth_wrapper_init(); settings_add_str("misc", "ignore_signals", ""); settings_add_bool("misc", "override_coredump_limit", FALSE); @@ -281,6 +285,7 @@ void core_deinit(void) signal_remove("setup changed", (SIGNAL_FUNC) read_settings); signal_remove("irssi init finished", (SIGNAL_FUNC) sig_irssi_init_finished); + wcwidth_wrapper_deinit(); chat_commands_deinit(); nicklist_deinit(); diff --git a/src/core/utf8.c b/src/core/utf8.c index c53d8816..c44fdbd6 100644 --- a/src/core/utf8.c +++ b/src/core/utf8.c @@ -36,7 +36,7 @@ int string_advance(char const **str, int policy) c = g_utf8_get_char(*str); *str = g_utf8_next_char(*str); - return unichar_isprint(c) ? mk_wcwidth(c) : 1; + return unichar_isprint(c) ? i_wcwidth(c) : 1; } else { /* Assume TREAT_STRING_AS_BYTES: */ *str += 1; diff --git a/src/core/utf8.h b/src/core/utf8.h index 5bb53193..9f7d1aaa 100644 --- a/src/core/utf8.h +++ b/src/core/utf8.h @@ -12,8 +12,14 @@ typedef guint32 unichar; /* Returns width for character (0-2). */ +int i_wcwidth(unichar c); + +/* Older variant of the above */ int mk_wcwidth(unichar c); +/* Signature for wcwidth implementations */ +typedef int (*WCWIDTH_FUNC) (unichar ucs); + /* Advance the str pointer one character further; return the number of columns * occupied by the skipped character. */ diff --git a/src/core/wcwidth-wrapper.c b/src/core/wcwidth-wrapper.c new file mode 100644 index 00000000..e08707e9 --- /dev/null +++ b/src/core/wcwidth-wrapper.c @@ -0,0 +1,117 @@ +/* + wcwidth-wrapper.c : irssi + + Copyright (C) 2018 dequis + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#define _XOPEN_SOURCE +#include + +#include "module.h" +#include "signals.h" +#include "settings.h" +#include "utf8.h" + +/* wcwidth=2 since unicode 5.2.0 */ +#define UNICODE_SQUARE_HIRAGANA_HOKA 0x1F200 + +/* wcwidth=2 since unicode 9.0.0 */ +#define UNICODE_IRSSI_LOGO 0x1F525 + +enum { + WCWIDTH_IMPL_AUTO = 0, + WCWIDTH_IMPL_OLD, + WCWIDTH_IMPL_SYSTEM, +}; + +WCWIDTH_FUNC wcwidth_impl_func = mk_wcwidth; + +int i_wcwidth(unichar ucs) +{ + return (*wcwidth_impl_func)(ucs); +} + +static int system_wcwidth(unichar ucs) +{ + int retval = wcwidth((wchar_t) ucs); + + if (retval < 0) { + /* Treat all unknown characters as taking one cell. This is + * the reason mk_wcwidth and other outdated implementations + * mostly worked with newer unicode, while glibc's wcwidth + * needs updating to recognize new characters. + * + * Instead of relying on that, we keep the behavior of assuming + * one cell even for glibc's implementation, which is still + * highly accurate and less of a headache overall. + */ + return 1; + } + + return retval; +} + +static void read_settings(void) +{ + static int choice = -1; + int newchoice; + + newchoice = settings_get_choice("wcwidth_implementation"); + + if (choice == newchoice) { + return; + } + + choice = newchoice; + + switch (choice) { + case WCWIDTH_IMPL_AUTO: + /* Test against characters that have wcwidth=2 + * since unicode 5.2 and 9.0 respectively */ + + if (system_wcwidth(UNICODE_SQUARE_HIRAGANA_HOKA) == 2 || + system_wcwidth(UNICODE_IRSSI_LOGO) == 2) { + wcwidth_impl_func = &system_wcwidth; + } else { + /* Fall back to our own (which implements 5.0) */ + wcwidth_impl_func = &mk_wcwidth; + } + break; + + case WCWIDTH_IMPL_OLD: + wcwidth_impl_func = &mk_wcwidth; + break; + + case WCWIDTH_IMPL_SYSTEM: + wcwidth_impl_func = &system_wcwidth; + break; + } + +} + +void wcwidth_wrapper_init(void) +{ + settings_add_choice("misc", "wcwidth_implementation", WCWIDTH_IMPL_AUTO, "auto;old;system"); + + read_settings(); + signal_add("setup changed", (SIGNAL_FUNC) read_settings); +} + +void wcwidth_wrapper_deinit(void) +{ + signal_remove("setup changed", (SIGNAL_FUNC) read_settings); +} diff --git a/src/fe-text/gui-entry.c b/src/fe-text/gui-entry.c index 52a39969..b3c73c15 100644 --- a/src/fe-text/gui-entry.c +++ b/src/fe-text/gui-entry.c @@ -51,7 +51,7 @@ static unichar i_tolower(unichar c) static int i_isalnum(unichar c) { if (term_type == TERM_TYPE_UTF8) - return (g_unichar_isalnum(c) || mk_wcwidth(c) == 0); + return (g_unichar_isalnum(c) || i_wcwidth(c) == 0); return c <= 255 ? isalnum(c) : 0; } @@ -219,7 +219,7 @@ static int pos2scrpos(GUI_ENTRY_REC *entry, int pos, int cursor) if (term_type == TERM_TYPE_BIG5) xpos += big5_width(c); else if (entry->utf8) - xpos += unichar_isprint(c) ? mk_wcwidth(c) : 1; + xpos += unichar_isprint(c) ? i_wcwidth(c) : 1; else xpos++; @@ -246,7 +246,7 @@ static int scrpos2pos(GUI_ENTRY_REC *entry, int pos) if (term_type == TERM_TYPE_BIG5) width = big5_width(c); else if (entry->utf8) - width = unichar_isprint(c) ? mk_wcwidth(c) : 1; + width = unichar_isprint(c) ? i_wcwidth(c) : 1; else width = 1; @@ -373,7 +373,7 @@ static void gui_entry_draw_from(GUI_ENTRY_REC *entry, int pos) else if (term_type == TERM_TYPE_BIG5) new_xpos += big5_width(c); else if (entry->utf8) - new_xpos += unichar_isprint(c) ? mk_wcwidth(c) : 1; + new_xpos += unichar_isprint(c) ? i_wcwidth(c) : 1; else new_xpos++; @@ -647,7 +647,7 @@ void gui_entry_insert_char(GUI_ENTRY_REC *entry, unichar chr) if (chr == 0 || chr == 13 || chr == 10) return; /* never insert NUL, CR or LF characters */ - if (entry->utf8 && entry->pos == 0 && mk_wcwidth(chr) == 0) + if (entry->utf8 && entry->pos == 0 && i_wcwidth(chr) == 0) return; gui_entry_redraw_from(entry, entry->pos); @@ -829,7 +829,7 @@ void gui_entry_erase(GUI_ENTRY_REC *entry, int size, CUTBUFFER_UPDATE_OP update_ if (entry->utf8) while (entry->pos-size-w > 0 && - mk_wcwidth(entry->text[entry->pos-size-w]) == 0) w++; + i_wcwidth(entry->text[entry->pos-size-w]) == 0) w++; g_memmove(entry->text + entry->pos - size, entry->text + entry->pos, (entry->text_len-entry->pos+1) * sizeof(unichar)); @@ -867,7 +867,7 @@ void gui_entry_erase_cell(GUI_ENTRY_REC *entry) if (entry->utf8) while (entry->pos+size < entry->text_len && - mk_wcwidth(entry->text[entry->pos+size]) == 0) size++; + i_wcwidth(entry->text[entry->pos+size]) == 0) size++; g_memmove(entry->text + entry->pos, entry->text + entry->pos + size, (entry->text_len-entry->pos-size+1) * sizeof(unichar)); @@ -1188,7 +1188,7 @@ void gui_entry_move_pos(GUI_ENTRY_REC *entry, int pos) if (entry->utf8) { int step = pos < 0 ? -1 : 1; - while(mk_wcwidth(entry->text[entry->pos]) == 0 && + while(i_wcwidth(entry->text[entry->pos]) == 0 && entry->pos + step >= 0 && entry->pos + step <= entry->text_len) entry->pos += step; } diff --git a/src/fe-text/term-terminfo.c b/src/fe-text/term-terminfo.c index 68947f0c..6e031ebf 100644 --- a/src/fe-text/term-terminfo.c +++ b/src/fe-text/term-terminfo.c @@ -515,7 +515,7 @@ void term_add_unichar(TERM_WINDOW *window, unichar chr) switch (term_type) { case TERM_TYPE_UTF8: - term_printed_text(unichar_isprint(chr) ? mk_wcwidth(chr) : 1); + term_printed_text(unichar_isprint(chr) ? i_wcwidth(chr) : 1); term_addch_utf8(window, chr); break; case TERM_TYPE_BIG5: @@ -558,7 +558,7 @@ int term_addstr(TERM_WINDOW *window, const char *str) len++; ptr++; } else { - len += unichar_isprint(tmp) ? mk_wcwidth(tmp) : 1; + len += unichar_isprint(tmp) ? i_wcwidth(tmp) : 1; ptr = g_utf8_next_char(ptr); } } diff --git a/src/fe-text/textbuffer-view.c b/src/fe-text/textbuffer-view.c index 3ccd95f5..dda2ed97 100644 --- a/src/fe-text/textbuffer-view.c +++ b/src/fe-text/textbuffer-view.c @@ -197,7 +197,7 @@ static inline unichar read_unichar(const unsigned char *data, const unsigned cha *width = 1; } else { *next = (unsigned char *)g_utf8_next_char(data); - *width = unichar_isprint(chr) ? mk_wcwidth(chr) : 1; + *width = unichar_isprint(chr) ? i_wcwidth(chr) : 1; } return chr; }