1
0
mirror of https://github.com/irssi/irssi.git synced 2024-12-04 14:46:39 -05:00

Add a wrapper of wcwidth() that picks the best implementation

This adds a i_wcwidth() function that replaces mk_wcwidth(), and a
'wcwidth_implementation' setting to pick which one it wraps.

Values:

- old: uses our local mk_wcwidth() which implements unicode 5.0
- system: uses the libc-provided wcwidth(), which may be better or worse
  than ours depending on how up to date the system is.
- auto: tests the system one against two characters that became
  fullwidth in unicode 5.2 and 9.0 respectively. If either of them pass,
  pick the system implementation, otherwise pick ours.

It defaults to auto.

mk_wcwidth() is still preferable in some cases, since the way it uses
ranges for fullwidth characters means most CJK blocks are covered even
if their characters didn't exist back then.

The "system" implementation is also wrapped to never return -1, but to
assume those unknown characters use one cell. Quoting the code:

    /* Treat all unknown characters as taking one cell. This is
     * the reason mk_wcwidth and other outdated implementations
     * mostly worked with newer unicode, while glibc's wcwidth
     * needs updating to recognize new characters.
     *
     * Instead of relying on that, we keep the behavior of assuming
     * one cell even for glibc's implementation, which is still
     * highly accurate and less of a headache overall.
     */
This commit is contained in:
dequis 2018-08-23 02:02:33 -03:00
parent 19d84bc16e
commit 0d8632943d
8 changed files with 141 additions and 12 deletions

View File

@ -53,6 +53,7 @@ libcore_a_SOURCES = \
utf8.c \
$(regex_impl) \
wcwidth.c \
wcwidth-wrapper.c \
tls.c \
write-buffer.c

View File

@ -60,6 +60,9 @@ void chat_commands_deinit(void);
void log_away_init(void);
void log_away_deinit(void);
void wcwidth_wrapper_init(void);
void wcwidth_wrapper_deinit(void);
int irssi_gui;
int irssi_init_finished;
int reload_config;
@ -258,6 +261,7 @@ void core_init(void)
nicklist_init();
chat_commands_init();
wcwidth_wrapper_init();
settings_add_str("misc", "ignore_signals", "");
settings_add_bool("misc", "override_coredump_limit", FALSE);
@ -281,6 +285,7 @@ void core_deinit(void)
signal_remove("setup changed", (SIGNAL_FUNC) read_settings);
signal_remove("irssi init finished", (SIGNAL_FUNC) sig_irssi_init_finished);
wcwidth_wrapper_deinit();
chat_commands_deinit();
nicklist_deinit();

View File

@ -36,7 +36,7 @@ int string_advance(char const **str, int policy)
c = g_utf8_get_char(*str);
*str = g_utf8_next_char(*str);
return unichar_isprint(c) ? mk_wcwidth(c) : 1;
return unichar_isprint(c) ? i_wcwidth(c) : 1;
} else {
/* Assume TREAT_STRING_AS_BYTES: */
*str += 1;

View File

@ -12,8 +12,14 @@
typedef guint32 unichar;
/* Returns width for character (0-2). */
int i_wcwidth(unichar c);
/* Older variant of the above */
int mk_wcwidth(unichar c);
/* Signature for wcwidth implementations */
typedef int (*WCWIDTH_FUNC) (unichar ucs);
/* Advance the str pointer one character further; return the number of columns
* occupied by the skipped character.
*/

117
src/core/wcwidth-wrapper.c Normal file
View File

@ -0,0 +1,117 @@
/*
wcwidth-wrapper.c : irssi
Copyright (C) 2018 dequis
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#define _XOPEN_SOURCE
#include <wchar.h>
#include "module.h"
#include "signals.h"
#include "settings.h"
#include "utf8.h"
/* wcwidth=2 since unicode 5.2.0 */
#define UNICODE_SQUARE_HIRAGANA_HOKA 0x1F200
/* wcwidth=2 since unicode 9.0.0 */
#define UNICODE_IRSSI_LOGO 0x1F525
enum {
WCWIDTH_IMPL_AUTO = 0,
WCWIDTH_IMPL_OLD,
WCWIDTH_IMPL_SYSTEM,
};
WCWIDTH_FUNC wcwidth_impl_func = mk_wcwidth;
int i_wcwidth(unichar ucs)
{
return (*wcwidth_impl_func)(ucs);
}
static int system_wcwidth(unichar ucs)
{
int retval = wcwidth((wchar_t) ucs);
if (retval < 0) {
/* Treat all unknown characters as taking one cell. This is
* the reason mk_wcwidth and other outdated implementations
* mostly worked with newer unicode, while glibc's wcwidth
* needs updating to recognize new characters.
*
* Instead of relying on that, we keep the behavior of assuming
* one cell even for glibc's implementation, which is still
* highly accurate and less of a headache overall.
*/
return 1;
}
return retval;
}
static void read_settings(void)
{
static int choice = -1;
int newchoice;
newchoice = settings_get_choice("wcwidth_implementation");
if (choice == newchoice) {
return;
}
choice = newchoice;
switch (choice) {
case WCWIDTH_IMPL_AUTO:
/* Test against characters that have wcwidth=2
* since unicode 5.2 and 9.0 respectively */
if (system_wcwidth(UNICODE_SQUARE_HIRAGANA_HOKA) == 2 ||
system_wcwidth(UNICODE_IRSSI_LOGO) == 2) {
wcwidth_impl_func = &system_wcwidth;
} else {
/* Fall back to our own (which implements 5.0) */
wcwidth_impl_func = &mk_wcwidth;
}
break;
case WCWIDTH_IMPL_OLD:
wcwidth_impl_func = &mk_wcwidth;
break;
case WCWIDTH_IMPL_SYSTEM:
wcwidth_impl_func = &system_wcwidth;
break;
}
}
void wcwidth_wrapper_init(void)
{
settings_add_choice("misc", "wcwidth_implementation", WCWIDTH_IMPL_AUTO, "auto;old;system");
read_settings();
signal_add("setup changed", (SIGNAL_FUNC) read_settings);
}
void wcwidth_wrapper_deinit(void)
{
signal_remove("setup changed", (SIGNAL_FUNC) read_settings);
}

View File

@ -51,7 +51,7 @@ static unichar i_tolower(unichar c)
static int i_isalnum(unichar c)
{
if (term_type == TERM_TYPE_UTF8)
return (g_unichar_isalnum(c) || mk_wcwidth(c) == 0);
return (g_unichar_isalnum(c) || i_wcwidth(c) == 0);
return c <= 255 ? isalnum(c) : 0;
}
@ -219,7 +219,7 @@ static int pos2scrpos(GUI_ENTRY_REC *entry, int pos, int cursor)
if (term_type == TERM_TYPE_BIG5)
xpos += big5_width(c);
else if (entry->utf8)
xpos += unichar_isprint(c) ? mk_wcwidth(c) : 1;
xpos += unichar_isprint(c) ? i_wcwidth(c) : 1;
else
xpos++;
@ -246,7 +246,7 @@ static int scrpos2pos(GUI_ENTRY_REC *entry, int pos)
if (term_type == TERM_TYPE_BIG5)
width = big5_width(c);
else if (entry->utf8)
width = unichar_isprint(c) ? mk_wcwidth(c) : 1;
width = unichar_isprint(c) ? i_wcwidth(c) : 1;
else
width = 1;
@ -373,7 +373,7 @@ static void gui_entry_draw_from(GUI_ENTRY_REC *entry, int pos)
else if (term_type == TERM_TYPE_BIG5)
new_xpos += big5_width(c);
else if (entry->utf8)
new_xpos += unichar_isprint(c) ? mk_wcwidth(c) : 1;
new_xpos += unichar_isprint(c) ? i_wcwidth(c) : 1;
else
new_xpos++;
@ -647,7 +647,7 @@ void gui_entry_insert_char(GUI_ENTRY_REC *entry, unichar chr)
if (chr == 0 || chr == 13 || chr == 10)
return; /* never insert NUL, CR or LF characters */
if (entry->utf8 && entry->pos == 0 && mk_wcwidth(chr) == 0)
if (entry->utf8 && entry->pos == 0 && i_wcwidth(chr) == 0)
return;
gui_entry_redraw_from(entry, entry->pos);
@ -829,7 +829,7 @@ void gui_entry_erase(GUI_ENTRY_REC *entry, int size, CUTBUFFER_UPDATE_OP update_
if (entry->utf8)
while (entry->pos-size-w > 0 &&
mk_wcwidth(entry->text[entry->pos-size-w]) == 0) w++;
i_wcwidth(entry->text[entry->pos-size-w]) == 0) w++;
g_memmove(entry->text + entry->pos - size, entry->text + entry->pos,
(entry->text_len-entry->pos+1) * sizeof(unichar));
@ -867,7 +867,7 @@ void gui_entry_erase_cell(GUI_ENTRY_REC *entry)
if (entry->utf8)
while (entry->pos+size < entry->text_len &&
mk_wcwidth(entry->text[entry->pos+size]) == 0) size++;
i_wcwidth(entry->text[entry->pos+size]) == 0) size++;
g_memmove(entry->text + entry->pos, entry->text + entry->pos + size,
(entry->text_len-entry->pos-size+1) * sizeof(unichar));
@ -1188,7 +1188,7 @@ void gui_entry_move_pos(GUI_ENTRY_REC *entry, int pos)
if (entry->utf8) {
int step = pos < 0 ? -1 : 1;
while(mk_wcwidth(entry->text[entry->pos]) == 0 &&
while(i_wcwidth(entry->text[entry->pos]) == 0 &&
entry->pos + step >= 0 && entry->pos + step <= entry->text_len)
entry->pos += step;
}

View File

@ -515,7 +515,7 @@ void term_add_unichar(TERM_WINDOW *window, unichar chr)
switch (term_type) {
case TERM_TYPE_UTF8:
term_printed_text(unichar_isprint(chr) ? mk_wcwidth(chr) : 1);
term_printed_text(unichar_isprint(chr) ? i_wcwidth(chr) : 1);
term_addch_utf8(window, chr);
break;
case TERM_TYPE_BIG5:
@ -558,7 +558,7 @@ int term_addstr(TERM_WINDOW *window, const char *str)
len++;
ptr++;
} else {
len += unichar_isprint(tmp) ? mk_wcwidth(tmp) : 1;
len += unichar_isprint(tmp) ? i_wcwidth(tmp) : 1;
ptr = g_utf8_next_char(ptr);
}
}

View File

@ -197,7 +197,7 @@ static inline unichar read_unichar(const unsigned char *data, const unsigned cha
*width = 1;
} else {
*next = (unsigned char *)g_utf8_next_char(data);
*width = unichar_isprint(chr) ? mk_wcwidth(chr) : 1;
*width = unichar_isprint(chr) ? i_wcwidth(chr) : 1;
}
return chr;
}