1
0
mirror of https://github.com/irssi/irssi.git synced 2024-11-03 04:27:19 -05:00

Merge pull request #480 from xavierog/handle-utf8-nicks-with-mk_wcwidth

Handle utf8 nicks with mk_wcwidth()
This commit is contained in:
ailin-nemui 2016-05-18 09:18:21 +02:00
commit 74d38683bf
14 changed files with 260 additions and 139 deletions

View File

@ -44,6 +44,7 @@ libcore_a_SOURCES = \
settings.c \
signals.c \
special-vars.c \
utf8.c \
write-buffer.c
structure_headers = \

View File

@ -25,10 +25,7 @@
#include "settings.h"
#include "servers.h"
#include "misc.h"
#define ALIGN_RIGHT 0x01
#define ALIGN_CUT 0x02
#define ALIGN_PAD 0x04
#include "utf8.h"
#define isvarchar(c) \
(i_isalnum(c) || (c) == '_')
@ -316,22 +313,28 @@ static int get_alignment_args(char **data, int *align, int *flags, char *pad)
}
/* return the aligned text */
static char *get_alignment(const char *text, int align, int flags, char pad)
char *get_alignment(const char *text, int align, int flags, char pad)
{
GString *str;
char *ret;
int policy;
unsigned int cut_bytes;
g_return_val_if_fail(text != NULL, NULL);
policy = string_policy(text);
str = g_string_new(text);
/* cut */
if ((flags & ALIGN_CUT) && align > 0 && str->len > align)
g_string_truncate(str, align);
if ((flags & ALIGN_CUT) && align > 0 && string_width(text, policy) > align) {
string_chars_for_width(text, policy, align, &cut_bytes);
g_string_truncate(str, cut_bytes);
}
/* add pad characters */
if (flags & ALIGN_PAD) {
while (str->len < align) {
while (string_width(str->str, policy) < align) {
if (flags & ALIGN_RIGHT)
g_string_prepend_c(str, pad);
else

View File

@ -9,9 +9,16 @@
#define PARSE_FLAG_ESCAPE_THEME 0x08 /* if any arguments/variables contain { or } chars, escape them with % */
#define PARSE_FLAG_ONLY_ARGS 0x10 /* expand only arguments ($0 $1 etc.) but no other $variables */
#define ALIGN_RIGHT 0x01
#define ALIGN_CUT 0x02
#define ALIGN_PAD 0x04
typedef char* (*SPECIAL_HISTORY_FUNC)
(const char *text, void *item, int *free_ret);
/* Cut and/or pad text so it takes exactly "align" characters on the screen */
char *get_alignment(const char *text, int align, int flags, char pad);
/* Parse and expand text after '$' character. return value has to be
g_free()'d if `free_ret' is TRUE. */
char *parse_special(char **cmd, SERVER_REC *server, void *item,

135
src/core/utf8.c Normal file
View File

@ -0,0 +1,135 @@
/* utf8.c - Operations on UTF-8 strings.
*
* Copyright (C) 2002 Timo Sirainen
*
* Based on GLib code by
*
* Copyright (C) 1999 Tom Tromey
* Copyright (C) 2000 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "utf8.h"
#include "module.h"
#include "wcwidth.c"
/* Provide is_utf8(): */
#include "recode.h"
int string_advance(char const **str, int policy)
{
if (policy == TREAT_STRING_AS_UTF8) {
gunichar c;
c = g_utf8_get_char(*str);
*str = g_utf8_next_char(*str);
return unichar_isprint(c) ? mk_wcwidth(c) : 1;
} else {
/* Assume TREAT_STRING_AS_BYTES: */
*str += 1;
return 1;
}
}
int string_policy(const char *str)
{
if (is_utf8()) {
if (str == NULL || g_utf8_validate(str, -1, NULL)) {
/* No string provided or valid UTF-8 string: treat as UTF-8: */
return TREAT_STRING_AS_UTF8;
}
}
return TREAT_STRING_AS_BYTES;
}
int string_length(const char *str, int policy)
{
g_return_val_if_fail(str != NULL, 0);
if (policy == -1) {
policy = string_policy(str);
}
if (policy == TREAT_STRING_AS_UTF8) {
return g_utf8_strlen(str, -1);
}
else {
/* Assume TREAT_STRING_AS_BYTES: */
return strlen(str);
}
}
int string_width(const char *str, int policy)
{
int len;
g_return_val_if_fail(str != NULL, 0);
if (policy == -1) {
policy = string_policy(str);
}
len = 0;
while (*str != '\0') {
len += string_advance(&str, policy);
}
return len;
}
int string_chars_for_width(const char *str, int policy, unsigned int n, unsigned int *bytes)
{
const char *c, *previous_c;
int str_width, char_width, char_count;
g_return_val_if_fail(str != NULL, -1);
/* Handle the dummy case where n is 0: */
if (n == 0) {
if (bytes != NULL) {
*bytes = 0;
}
return 0;
}
if (policy == -1) {
policy = string_policy(str);
}
/* Iterate over characters until we reach n: */
char_count = 0;
str_width = 0;
c = str;
while (*c != '\0') {
previous_c = c;
char_width = string_advance(&c, policy);
if (str_width + char_width > n) {
/* We stepped beyond n, get one step back and stop there: */
c = previous_c;
break;
}
++ char_count;
str_width += char_width;
}
/* At this point, we know that char_count characters reach str_width
* columns, which is less than or equal to n. */
/* Optionally provide the equivalent amount of bytes: */
if (bytes != NULL) {
*bytes = c - str;
}
return char_count;
}

56
src/core/utf8.h Normal file
View File

@ -0,0 +1,56 @@
#ifndef __UTF8_H
#define __UTF8_H
/* XXX I didn't check the encoding range of big5+. This is standard big5. */
#define is_big5_los(lo) (0x40 <= (lo) && (lo) <= 0x7E) /* standard */
#define is_big5_lox(lo) (0x80 <= (lo) && (lo) <= 0xFE) /* extended */
#define is_big5_lo(lo) ((is_big5_los(lo) || is_big5_lox(lo)))
#define is_big5_hi(hi) (0x81 <= (hi) && (hi) <= 0xFE)
#define is_big5(hi,lo) (is_big5_hi(hi) && is_big5_lo(lo))
#include <glib.h>
typedef guint32 unichar;
/* Returns width for character (0-2). */
int mk_wcwidth(unichar c);
/* Advance the str pointer one character further; return the number of columns
* occupied by the skipped character.
*/
int string_advance(char const **str, int policy);
/* TREAT_STRING_AS_BYTES means strings are to be treated using strncpy,
* strnlen, etc.
* TREAT_STRING_AS_UTF8 means strings are to be treated using g_utf8_*
* functions.
*/
enum str_policy {
TREAT_STRING_AS_BYTES,
TREAT_STRING_AS_UTF8
};
/* Return how the str string ought to be treated: TREAT_STRING_AS_UTF8 if the
* terminal handles UTF-8 and if the string appears to be a valid UTF-8 string;
* TREAT_STRING_AS_BYTES otherwise.
*/
int string_policy(const char *str);
/* Return the length of the str string according to the given policy; if policy
* is -1, this function will call string_policy().
*/
int string_length(const char *str, int policy);
/* Return the screen width of the str string according to the given policy; if
* policy is -1, this function will call string_policy().
*/
int string_width(const char *str, int policy);
/* Return the amount of characters from str it takes to reach n columns, or -1 if
* str is NULL. Optionally return the equivalent amount of bytes.
* If policy is -1, this function will call string_policy().
*/
int string_chars_for_width(const char *str, int policy, unsigned int n, unsigned int *bytes);
#define unichar_isprint(c) (((c) & ~0x80) >= 32)
#define is_utf8_leading(c) (((c) & 0xc0) != 0x80)
#endif

View File

@ -24,8 +24,6 @@ libfe_common_core_a_SOURCES = \
fe-queries.c \
fe-server.c \
fe-settings.c \
utf8.c \
wcwidth.c \
formats.c \
hilight-text.c \
keyboard.c \
@ -62,6 +60,3 @@ pkginc_fe_common_core_HEADERS = \
window-items.h \
windows-layout.h \
fe-windows.h
noinst_HEADERS = \
utf8.h

View File

@ -26,6 +26,8 @@
#include "levels.h"
#include "misc.h"
#include "settings.h"
#include "special-vars.h"
#include "utf8.h"
#include "chat-protocols.h"
#include "chatnets.h"
@ -323,7 +325,7 @@ static void cmd_channel_remove(const char *data)
static int get_nick_length(void *data)
{
return strlen(((NICK_REC *) data)->nick);
return string_width(((NICK_REC *) data)->nick, -1);
}
static void display_sorted_nicks(CHANNEL_REC *channel, GSList *nicklist)
@ -333,9 +335,9 @@ static void display_sorted_nicks(CHANNEL_REC *channel, GSList *nicklist)
GString *str;
GSList *tmp;
char *format, *stripped, *prefix_format;
char *linebuf, nickmode[2] = { 0, 0 };
char *aligned_nick, nickmode[2] = { 0, 0 };
int *columns, cols, rows, last_col_rows, col, row, max_width;
int item_extra, linebuf_size, formatnum;
int item_extra, formatnum;
window = window_find_closest(channel->server, channel->visible_name,
MSGLEVEL_CLIENTCRAP);
@ -394,7 +396,6 @@ static void display_sorted_nicks(CHANNEL_REC *channel, GSList *nicklist)
last_col_rows = rows;
str = g_string_new(prefix_format);
linebuf_size = max_width+1; linebuf = g_malloc(linebuf_size);
col = 0; row = 0;
for (tmp = nicklist; tmp != NULL; tmp = tmp->next) {
@ -405,13 +406,9 @@ static void display_sorted_nicks(CHANNEL_REC *channel, GSList *nicklist)
else
nickmode[0] = ' ';
if (linebuf_size < columns[col]-item_extra+1) {
linebuf_size = (columns[col]-item_extra+1)*2;
linebuf = g_realloc(linebuf, linebuf_size);
}
memset(linebuf, ' ', columns[col]-item_extra);
linebuf[columns[col]-item_extra] = '\0';
memcpy(linebuf, rec->nick, strlen(rec->nick));
aligned_nick = get_alignment(rec->nick,
columns[col]-item_extra,
ALIGN_PAD, ' ');
formatnum = rec->op ? TXT_NAMES_NICK_OP :
rec->halfop ? TXT_NAMES_NICK_HALFOP :
@ -420,8 +417,9 @@ static void display_sorted_nicks(CHANNEL_REC *channel, GSList *nicklist)
format = format_get_text(MODULE_NAME, NULL,
channel->server,
channel->visible_name,
formatnum, nickmode, linebuf);
formatnum, nickmode, aligned_nick);
g_string_append(str, format);
g_free(aligned_nick);
g_free(format);
if (++col == cols) {
@ -446,7 +444,6 @@ static void display_sorted_nicks(CHANNEL_REC *channel, GSList *nicklist)
g_string_free(str, TRUE);
g_free_not_null(columns);
g_free_not_null(prefix_format);
g_free(linebuf);
}
void fe_channels_nicklist(CHANNEL_REC *channel, int flags)

View File

@ -420,33 +420,17 @@ void format_create_dest_tag(TEXT_DEST_REC *dest, void *server,
window_find_closest(server, target, level);
}
static int advance (char const **str, gboolean utf8)
{
if (utf8) {
gunichar c;
c = g_utf8_get_char(*str);
*str = g_utf8_next_char(*str);
return unichar_isprint(c) ? mk_wcwidth(c) : 1;
} else {
*str += 1;
return 1;
}
}
/* Return length of text part in string (ie. without % codes) */
int format_get_length(const char *str)
{
GString *tmp;
int len;
gboolean utf8;
int utf8;
int adv = 0;
g_return_val_if_fail(str != NULL, 0);
utf8 = is_utf8() && g_utf8_validate(str, -1, NULL);
utf8 = string_policy(str);
tmp = g_string_new(NULL);
len = 0;
@ -465,7 +449,7 @@ int format_get_length(const char *str)
len++;
}
len += advance(&str, utf8);
len += string_advance(&str, utf8);
}
g_string_free(tmp, TRUE);
@ -480,12 +464,12 @@ int format_real_length(const char *str, int len)
GString *tmp;
const char *start;
const char *oldstr;
gboolean utf8;
int utf8;
int adv = 0;
g_return_val_if_fail(str != NULL, 0);
g_return_val_if_fail(len >= 0, 0);
utf8 = is_utf8() && g_utf8_validate(str, -1, NULL);
utf8 = string_policy(str);
start = str;
tmp = g_string_new(NULL);
@ -507,7 +491,7 @@ int format_real_length(const char *str, int len)
}
oldstr = str;
len -= advance(&str, utf8);
len -= string_advance(&str, utf8);
if (len < 0)
str = oldstr;
}

View File

@ -2,7 +2,7 @@
#define MODULE_NAME "fe-common/core"
typedef guint32 unichar;
#include "utf8.h"
typedef struct {
time_t time;
char *nick;

View File

@ -1,26 +0,0 @@
/* utf8.c - Operations on UTF-8 strings.
*
* Copyright (C) 2002 Timo Sirainen
*
* Based on GLib code by
*
* Copyright (C) 1999 Tom Tromey
* Copyright (C) 2000 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "module.h"

View File

@ -1,17 +0,0 @@
#ifndef __UTF8_H
#define __UTF8_H
/* XXX I didn't check the encoding range of big5+. This is standard big5. */
#define is_big5_los(lo) (0x40 <= (lo) && (lo) <= 0x7E) /* standard */
#define is_big5_lox(lo) (0x80 <= (lo) && (lo) <= 0xFE) /* extended */
#define is_big5_lo(lo) ((is_big5_los(lo) || is_big5_lox(lo)))
#define is_big5_hi(hi) (0x81 <= (hi) && (hi) <= 0xFE)
#define is_big5(hi,lo) (is_big5_hi(hi) && is_big5_lo(lo))
/* Returns width for character (0-2). */
int mk_wcwidth(unichar c);
#define unichar_isprint(c) (((c) & ~0x80) >= 32)
#define is_utf8_leading(c) (((c) & 0xc0) != 0x80)
#endif

View File

@ -366,22 +366,8 @@ static int scrlen_str(const char *str)
char *stripped;
g_return_val_if_fail(str != NULL, 0);
str = stripped = strip_codes(str);
if (is_utf8() && g_utf8_validate(str, -1, NULL)) {
while (*str != '\0') {
gunichar c;
c = g_utf8_get_char(str);
str = g_utf8_next_char(str);
len += unichar_isprint(c) ? mk_wcwidth(c) : 1;
}
} else {
len = strlen(str);
}
stripped = strip_codes(str);
len = string_width(stripped, -1);
g_free(stripped);
return len;
}

View File

@ -27,7 +27,7 @@ typedef struct _TERM_WINDOW TERM_WINDOW;
#define TERM_TYPE_UTF8 1
#define TERM_TYPE_BIG5 2
typedef guint32 unichar;
#include "utf8.h"
extern TERM_WINDOW *root_window;
extern int term_width, term_height;