mirror of
https://github.com/irssi/irssi.git
synced 2024-12-04 14:46:39 -05:00
Add a wrapper of wcwidth() that picks the best implementation
This adds a i_wcwidth() function that replaces mk_wcwidth(), and a 'wcwidth_implementation' setting to pick which one it wraps. Values: - old: uses our local mk_wcwidth() which implements unicode 5.0 - system: uses the libc-provided wcwidth(), which may be better or worse than ours depending on how up to date the system is. - auto: tests the system one against two characters that became fullwidth in unicode 5.2 and 9.0 respectively. If either of them pass, pick the system implementation, otherwise pick ours. It defaults to auto. mk_wcwidth() is still preferable in some cases, since the way it uses ranges for fullwidth characters means most CJK blocks are covered even if their characters didn't exist back then. The "system" implementation is also wrapped to never return -1, but to assume those unknown characters use one cell. Quoting the code: /* Treat all unknown characters as taking one cell. This is * the reason mk_wcwidth and other outdated implementations * mostly worked with newer unicode, while glibc's wcwidth * needs updating to recognize new characters. * * Instead of relying on that, we keep the behavior of assuming * one cell even for glibc's implementation, which is still * highly accurate and less of a headache overall. */
This commit is contained in:
parent
19d84bc16e
commit
0d8632943d
@ -53,6 +53,7 @@ libcore_a_SOURCES = \
|
||||
utf8.c \
|
||||
$(regex_impl) \
|
||||
wcwidth.c \
|
||||
wcwidth-wrapper.c \
|
||||
tls.c \
|
||||
write-buffer.c
|
||||
|
||||
|
@ -60,6 +60,9 @@ void chat_commands_deinit(void);
|
||||
void log_away_init(void);
|
||||
void log_away_deinit(void);
|
||||
|
||||
void wcwidth_wrapper_init(void);
|
||||
void wcwidth_wrapper_deinit(void);
|
||||
|
||||
int irssi_gui;
|
||||
int irssi_init_finished;
|
||||
int reload_config;
|
||||
@ -258,6 +261,7 @@ void core_init(void)
|
||||
nicklist_init();
|
||||
|
||||
chat_commands_init();
|
||||
wcwidth_wrapper_init();
|
||||
|
||||
settings_add_str("misc", "ignore_signals", "");
|
||||
settings_add_bool("misc", "override_coredump_limit", FALSE);
|
||||
@ -281,6 +285,7 @@ void core_deinit(void)
|
||||
signal_remove("setup changed", (SIGNAL_FUNC) read_settings);
|
||||
signal_remove("irssi init finished", (SIGNAL_FUNC) sig_irssi_init_finished);
|
||||
|
||||
wcwidth_wrapper_deinit();
|
||||
chat_commands_deinit();
|
||||
|
||||
nicklist_deinit();
|
||||
|
@ -36,7 +36,7 @@ int string_advance(char const **str, int policy)
|
||||
c = g_utf8_get_char(*str);
|
||||
*str = g_utf8_next_char(*str);
|
||||
|
||||
return unichar_isprint(c) ? mk_wcwidth(c) : 1;
|
||||
return unichar_isprint(c) ? i_wcwidth(c) : 1;
|
||||
} else {
|
||||
/* Assume TREAT_STRING_AS_BYTES: */
|
||||
*str += 1;
|
||||
|
@ -12,8 +12,14 @@
|
||||
typedef guint32 unichar;
|
||||
|
||||
/* Returns width for character (0-2). */
|
||||
int i_wcwidth(unichar c);
|
||||
|
||||
/* Older variant of the above */
|
||||
int mk_wcwidth(unichar c);
|
||||
|
||||
/* Signature for wcwidth implementations */
|
||||
typedef int (*WCWIDTH_FUNC) (unichar ucs);
|
||||
|
||||
/* Advance the str pointer one character further; return the number of columns
|
||||
* occupied by the skipped character.
|
||||
*/
|
||||
|
117
src/core/wcwidth-wrapper.c
Normal file
117
src/core/wcwidth-wrapper.c
Normal file
@ -0,0 +1,117 @@
|
||||
/*
|
||||
wcwidth-wrapper.c : irssi
|
||||
|
||||
Copyright (C) 2018 dequis
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#define _XOPEN_SOURCE
|
||||
#include <wchar.h>
|
||||
|
||||
#include "module.h"
|
||||
#include "signals.h"
|
||||
#include "settings.h"
|
||||
#include "utf8.h"
|
||||
|
||||
/* wcwidth=2 since unicode 5.2.0 */
|
||||
#define UNICODE_SQUARE_HIRAGANA_HOKA 0x1F200
|
||||
|
||||
/* wcwidth=2 since unicode 9.0.0 */
|
||||
#define UNICODE_IRSSI_LOGO 0x1F525
|
||||
|
||||
enum {
|
||||
WCWIDTH_IMPL_AUTO = 0,
|
||||
WCWIDTH_IMPL_OLD,
|
||||
WCWIDTH_IMPL_SYSTEM,
|
||||
};
|
||||
|
||||
WCWIDTH_FUNC wcwidth_impl_func = mk_wcwidth;
|
||||
|
||||
int i_wcwidth(unichar ucs)
|
||||
{
|
||||
return (*wcwidth_impl_func)(ucs);
|
||||
}
|
||||
|
||||
static int system_wcwidth(unichar ucs)
|
||||
{
|
||||
int retval = wcwidth((wchar_t) ucs);
|
||||
|
||||
if (retval < 0) {
|
||||
/* Treat all unknown characters as taking one cell. This is
|
||||
* the reason mk_wcwidth and other outdated implementations
|
||||
* mostly worked with newer unicode, while glibc's wcwidth
|
||||
* needs updating to recognize new characters.
|
||||
*
|
||||
* Instead of relying on that, we keep the behavior of assuming
|
||||
* one cell even for glibc's implementation, which is still
|
||||
* highly accurate and less of a headache overall.
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static void read_settings(void)
|
||||
{
|
||||
static int choice = -1;
|
||||
int newchoice;
|
||||
|
||||
newchoice = settings_get_choice("wcwidth_implementation");
|
||||
|
||||
if (choice == newchoice) {
|
||||
return;
|
||||
}
|
||||
|
||||
choice = newchoice;
|
||||
|
||||
switch (choice) {
|
||||
case WCWIDTH_IMPL_AUTO:
|
||||
/* Test against characters that have wcwidth=2
|
||||
* since unicode 5.2 and 9.0 respectively */
|
||||
|
||||
if (system_wcwidth(UNICODE_SQUARE_HIRAGANA_HOKA) == 2 ||
|
||||
system_wcwidth(UNICODE_IRSSI_LOGO) == 2) {
|
||||
wcwidth_impl_func = &system_wcwidth;
|
||||
} else {
|
||||
/* Fall back to our own (which implements 5.0) */
|
||||
wcwidth_impl_func = &mk_wcwidth;
|
||||
}
|
||||
break;
|
||||
|
||||
case WCWIDTH_IMPL_OLD:
|
||||
wcwidth_impl_func = &mk_wcwidth;
|
||||
break;
|
||||
|
||||
case WCWIDTH_IMPL_SYSTEM:
|
||||
wcwidth_impl_func = &system_wcwidth;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void wcwidth_wrapper_init(void)
|
||||
{
|
||||
settings_add_choice("misc", "wcwidth_implementation", WCWIDTH_IMPL_AUTO, "auto;old;system");
|
||||
|
||||
read_settings();
|
||||
signal_add("setup changed", (SIGNAL_FUNC) read_settings);
|
||||
}
|
||||
|
||||
void wcwidth_wrapper_deinit(void)
|
||||
{
|
||||
signal_remove("setup changed", (SIGNAL_FUNC) read_settings);
|
||||
}
|
@ -51,7 +51,7 @@ static unichar i_tolower(unichar c)
|
||||
static int i_isalnum(unichar c)
|
||||
{
|
||||
if (term_type == TERM_TYPE_UTF8)
|
||||
return (g_unichar_isalnum(c) || mk_wcwidth(c) == 0);
|
||||
return (g_unichar_isalnum(c) || i_wcwidth(c) == 0);
|
||||
return c <= 255 ? isalnum(c) : 0;
|
||||
}
|
||||
|
||||
@ -219,7 +219,7 @@ static int pos2scrpos(GUI_ENTRY_REC *entry, int pos, int cursor)
|
||||
if (term_type == TERM_TYPE_BIG5)
|
||||
xpos += big5_width(c);
|
||||
else if (entry->utf8)
|
||||
xpos += unichar_isprint(c) ? mk_wcwidth(c) : 1;
|
||||
xpos += unichar_isprint(c) ? i_wcwidth(c) : 1;
|
||||
else
|
||||
xpos++;
|
||||
|
||||
@ -246,7 +246,7 @@ static int scrpos2pos(GUI_ENTRY_REC *entry, int pos)
|
||||
if (term_type == TERM_TYPE_BIG5)
|
||||
width = big5_width(c);
|
||||
else if (entry->utf8)
|
||||
width = unichar_isprint(c) ? mk_wcwidth(c) : 1;
|
||||
width = unichar_isprint(c) ? i_wcwidth(c) : 1;
|
||||
else
|
||||
width = 1;
|
||||
|
||||
@ -373,7 +373,7 @@ static void gui_entry_draw_from(GUI_ENTRY_REC *entry, int pos)
|
||||
else if (term_type == TERM_TYPE_BIG5)
|
||||
new_xpos += big5_width(c);
|
||||
else if (entry->utf8)
|
||||
new_xpos += unichar_isprint(c) ? mk_wcwidth(c) : 1;
|
||||
new_xpos += unichar_isprint(c) ? i_wcwidth(c) : 1;
|
||||
else
|
||||
new_xpos++;
|
||||
|
||||
@ -647,7 +647,7 @@ void gui_entry_insert_char(GUI_ENTRY_REC *entry, unichar chr)
|
||||
if (chr == 0 || chr == 13 || chr == 10)
|
||||
return; /* never insert NUL, CR or LF characters */
|
||||
|
||||
if (entry->utf8 && entry->pos == 0 && mk_wcwidth(chr) == 0)
|
||||
if (entry->utf8 && entry->pos == 0 && i_wcwidth(chr) == 0)
|
||||
return;
|
||||
|
||||
gui_entry_redraw_from(entry, entry->pos);
|
||||
@ -829,7 +829,7 @@ void gui_entry_erase(GUI_ENTRY_REC *entry, int size, CUTBUFFER_UPDATE_OP update_
|
||||
|
||||
if (entry->utf8)
|
||||
while (entry->pos-size-w > 0 &&
|
||||
mk_wcwidth(entry->text[entry->pos-size-w]) == 0) w++;
|
||||
i_wcwidth(entry->text[entry->pos-size-w]) == 0) w++;
|
||||
|
||||
g_memmove(entry->text + entry->pos - size, entry->text + entry->pos,
|
||||
(entry->text_len-entry->pos+1) * sizeof(unichar));
|
||||
@ -867,7 +867,7 @@ void gui_entry_erase_cell(GUI_ENTRY_REC *entry)
|
||||
|
||||
if (entry->utf8)
|
||||
while (entry->pos+size < entry->text_len &&
|
||||
mk_wcwidth(entry->text[entry->pos+size]) == 0) size++;
|
||||
i_wcwidth(entry->text[entry->pos+size]) == 0) size++;
|
||||
|
||||
g_memmove(entry->text + entry->pos, entry->text + entry->pos + size,
|
||||
(entry->text_len-entry->pos-size+1) * sizeof(unichar));
|
||||
@ -1188,7 +1188,7 @@ void gui_entry_move_pos(GUI_ENTRY_REC *entry, int pos)
|
||||
|
||||
if (entry->utf8) {
|
||||
int step = pos < 0 ? -1 : 1;
|
||||
while(mk_wcwidth(entry->text[entry->pos]) == 0 &&
|
||||
while(i_wcwidth(entry->text[entry->pos]) == 0 &&
|
||||
entry->pos + step >= 0 && entry->pos + step <= entry->text_len)
|
||||
entry->pos += step;
|
||||
}
|
||||
|
@ -515,7 +515,7 @@ void term_add_unichar(TERM_WINDOW *window, unichar chr)
|
||||
|
||||
switch (term_type) {
|
||||
case TERM_TYPE_UTF8:
|
||||
term_printed_text(unichar_isprint(chr) ? mk_wcwidth(chr) : 1);
|
||||
term_printed_text(unichar_isprint(chr) ? i_wcwidth(chr) : 1);
|
||||
term_addch_utf8(window, chr);
|
||||
break;
|
||||
case TERM_TYPE_BIG5:
|
||||
@ -558,7 +558,7 @@ int term_addstr(TERM_WINDOW *window, const char *str)
|
||||
len++;
|
||||
ptr++;
|
||||
} else {
|
||||
len += unichar_isprint(tmp) ? mk_wcwidth(tmp) : 1;
|
||||
len += unichar_isprint(tmp) ? i_wcwidth(tmp) : 1;
|
||||
ptr = g_utf8_next_char(ptr);
|
||||
}
|
||||
}
|
||||
|
@ -197,7 +197,7 @@ static inline unichar read_unichar(const unsigned char *data, const unsigned cha
|
||||
*width = 1;
|
||||
} else {
|
||||
*next = (unsigned char *)g_utf8_next_char(data);
|
||||
*width = unichar_isprint(chr) ? mk_wcwidth(chr) : 1;
|
||||
*width = unichar_isprint(chr) ? i_wcwidth(chr) : 1;
|
||||
}
|
||||
return chr;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user