Merge pull request #480 from xavierog/handle-utf8-nicks-with-mk_wcwidth

Handle utf8 nicks with mk_wcwidth()
2024-11-03 04:27:19 -05:00 · 2016-05-18 09:18:21 +02:00 · 2016-05-18 09:18:21 +02:00 · 74d38683bf
commit 74d38683bf
parent 5564f85041 5d69b4c4a7
14 changed files with 260 additions and 139 deletions
--- a/src/core/Makefile.am
+++ b/src/core/Makefile.am
@ -44,6 +44,7 @@ libcore_a_SOURCES = \
 	settings.c \
 	signals.c \
 	special-vars.c \
+	utf8.c \
 	write-buffer.c

 structure_headers = \
--- a/src/core/special-vars.c
+++ b/src/core/special-vars.c
@ -25,10 +25,7 @@
 #include "settings.h"
 #include "servers.h"
 #include "misc.h"
-
-#define ALIGN_RIGHT 0x01
-#define ALIGN_CUT   0x02
-#define ALIGN_PAD   0x04
+#include "utf8.h"

 #define isvarchar(c) \
        (i_isalnum(c) || (c) == '_')
@ -316,22 +313,28 @@ static int get_alignment_args(char **data, int *align, int *flags, char *pad)
 }

 /* return the aligned text */
-static char *get_alignment(const char *text, int align, int flags, char pad)
+char *get_alignment(const char *text, int align, int flags, char pad)
 {
 	GString *str;
 	char *ret;
+	int policy;
+	unsigned int cut_bytes;

 	g_return_val_if_fail(text != NULL, NULL);

+	policy = string_policy(text);
+
 	str = g_string_new(text);

 	/* cut */
-	if ((flags & ALIGN_CUT) && align > 0 && str->len > align)
-		g_string_truncate(str, align);
+	if ((flags & ALIGN_CUT) && align > 0 && string_width(text, policy) > align) {
+		string_chars_for_width(text, policy, align, &cut_bytes);
+		g_string_truncate(str, cut_bytes);
+	}

 	/* add pad characters */
 	if (flags & ALIGN_PAD) {
-		while (str->len < align) {
+		while (string_width(str->str, policy) < align) {
 			if (flags & ALIGN_RIGHT)
 				g_string_prepend_c(str, pad);
 			else
--- a/src/core/special-vars.h
+++ b/src/core/special-vars.h
@ -9,9 +9,16 @@
 #define PARSE_FLAG_ESCAPE_THEME 0x08 /* if any arguments/variables contain { or } chars, escape them with % */
 #define PARSE_FLAG_ONLY_ARGS	0x10 /* expand only arguments ($0 $1 etc.) but no other $variables */

+#define ALIGN_RIGHT 0x01
+#define ALIGN_CUT   0x02
+#define ALIGN_PAD   0x04
+
 typedef char* (*SPECIAL_HISTORY_FUNC)
 	(const char *text, void *item, int *free_ret);

+/* Cut and/or pad text so it takes exactly "align" characters on the screen */
+char *get_alignment(const char *text, int align, int flags, char pad);
+
 /* Parse and expand text after '$' character. return value has to be
   g_free()'d if `free_ret' is TRUE. */
 char *parse_special(char **cmd, SERVER_REC *server, void *item,
--- a/src/core/utf8.c
+++ b/src/core/utf8.c
@ -0,0 +1,135 @@
+/* utf8.c - Operations on UTF-8 strings.
+ *
+ * Copyright (C) 2002 Timo Sirainen
+ *
+ * Based on GLib code by
+ *
+ * Copyright (C) 1999 Tom Tromey
+ * Copyright (C) 2000 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "utf8.h"
+#include "module.h"
+#include "wcwidth.c"
+/* Provide is_utf8(): */
+#include "recode.h"
+
+int string_advance(char const **str, int policy)
+{
+	if (policy == TREAT_STRING_AS_UTF8) {
+		gunichar c;
+
+		c = g_utf8_get_char(*str);
+		*str = g_utf8_next_char(*str);
+
+		return unichar_isprint(c) ? mk_wcwidth(c) : 1;
+	} else {
+		/* Assume TREAT_STRING_AS_BYTES: */
+		*str += 1;
+
+		return 1;
+	}
+}
+
+int string_policy(const char *str)
+{
+	if (is_utf8()) {
+		if (str == NULL || g_utf8_validate(str, -1, NULL)) {
+			/* No string provided or valid UTF-8 string: treat as UTF-8: */
+			return TREAT_STRING_AS_UTF8;
+		}
+	}
+	return TREAT_STRING_AS_BYTES;
+}
+
+int string_length(const char *str, int policy)
+{
+	g_return_val_if_fail(str != NULL, 0);
+
+	if (policy == -1) {
+		policy = string_policy(str);
+	}
+
+	if (policy == TREAT_STRING_AS_UTF8) {
+		return g_utf8_strlen(str, -1);
+	}
+	else {
+		/* Assume TREAT_STRING_AS_BYTES: */
+		return strlen(str);
+	}
+}
+
+int string_width(const char *str, int policy)
+{
+	int len;
+
+	g_return_val_if_fail(str != NULL, 0);
+
+	if (policy == -1) {
+		policy = string_policy(str);
+	}
+
+	len = 0;
+	while (*str != '\0') {
+		len += string_advance(&str, policy);
+	}
+	return len;
+}
+
+int string_chars_for_width(const char *str, int policy, unsigned int n, unsigned int *bytes)
+{
+	const char *c, *previous_c;
+	int str_width, char_width, char_count;
+
+	g_return_val_if_fail(str != NULL, -1);
+
+	/* Handle the dummy case where n is 0: */
+	if (n == 0) {
+		if (bytes != NULL) {
+			*bytes = 0;
+		}
+		return 0;
+	}
+
+	if (policy == -1) {
+		policy = string_policy(str);
+	}
+
+	/* Iterate over characters until we reach n: */
+	char_count = 0;
+	str_width = 0;
+	c = str;
+	while (*c != '\0') {
+		previous_c = c;
+		char_width = string_advance(&c, policy);
+		if (str_width + char_width > n) {
+			/* We stepped beyond n, get one step back and stop there: */
+			c = previous_c;
+			break;
+		}
+		++ char_count;
+		str_width += char_width;
+	}
+	/* At this point, we know that char_count characters reach str_width
+	 * columns, which is less than or equal to n. */
+
+	/* Optionally provide the equivalent amount of bytes: */
+	if (bytes != NULL) {
+		*bytes = c - str;
+	}
+	return char_count;
+}
--- a/src/core/utf8.h
+++ b/src/core/utf8.h
@ -0,0 +1,56 @@
+#ifndef __UTF8_H
+#define __UTF8_H
+
+/* XXX I didn't check the encoding range of big5+. This is standard big5. */
+#define is_big5_los(lo) (0x40 <= (lo) && (lo) <= 0x7E) /* standard */
+#define is_big5_lox(lo) (0x80 <= (lo) && (lo) <= 0xFE) /* extended */
+#define is_big5_lo(lo)	((is_big5_los(lo) || is_big5_lox(lo)))
+#define is_big5_hi(hi)  (0x81 <= (hi) && (hi) <= 0xFE)
+#define is_big5(hi,lo) (is_big5_hi(hi) && is_big5_lo(lo))
+
+#include <glib.h>
+typedef guint32 unichar;
+
+/* Returns width for character (0-2). */
+int mk_wcwidth(unichar c);
+
+/* Advance the str pointer one character further; return the number of columns
+ * occupied by the skipped character.
+ */
+int string_advance(char const **str, int policy);
+
+/* TREAT_STRING_AS_BYTES means strings are to be treated using strncpy,
+ * strnlen, etc.
+ * TREAT_STRING_AS_UTF8 means strings are to be treated using g_utf8_*
+ * functions.
+ */
+enum str_policy {
+	TREAT_STRING_AS_BYTES,
+	TREAT_STRING_AS_UTF8
+};
+
+/* Return how the str string ought to be treated: TREAT_STRING_AS_UTF8 if the
+ * terminal handles UTF-8 and if the string appears to be a valid UTF-8 string;
+ * TREAT_STRING_AS_BYTES otherwise.
+ */
+int string_policy(const char *str);
+
+/* Return the length of the str string according to the given policy; if policy
+ * is -1, this function will call string_policy().
+ */
+int string_length(const char *str, int policy);
+/* Return the screen width of the str string according to the given policy; if
+ * policy is -1, this function will call string_policy().
+ */
+int string_width(const char *str, int policy);
+
+/* Return the amount of characters from str it takes to reach n columns, or -1 if
+ * str is NULL. Optionally return the equivalent amount of bytes.
+ * If policy is -1, this function will call string_policy().
+ */
+int string_chars_for_width(const char *str, int policy, unsigned int n, unsigned int *bytes);
+
+#define unichar_isprint(c) (((c) & ~0x80) >= 32)
+#define is_utf8_leading(c) (((c) & 0xc0) != 0x80)
+
+#endif
--- a/src/fe-common/core/wcwidth.c
+++ b/src/fe-common/core/wcwidth.c
--- a/src/fe-common/core/Makefile.am
+++ b/src/fe-common/core/Makefile.am
@ -24,8 +24,6 @@ libfe_common_core_a_SOURCES = \
 	fe-queries.c \
 	fe-server.c \
 	fe-settings.c \
-	utf8.c \
-	wcwidth.c \
 	formats.c \
 	hilight-text.c \
 	keyboard.c \
@ -62,6 +60,3 @@ pkginc_fe_common_core_HEADERS = \
 	window-items.h \
 	windows-layout.h \
 	fe-windows.h
-
-noinst_HEADERS = \
-	utf8.h
--- a/src/fe-common/core/fe-channels.c
+++ b/src/fe-common/core/fe-channels.c
@ -26,6 +26,8 @@
 #include "levels.h"
 #include "misc.h"
 #include "settings.h"
+#include "special-vars.h"
+#include "utf8.h"

 #include "chat-protocols.h"
 #include "chatnets.h"
@ -323,7 +325,7 @@ static void cmd_channel_remove(const char *data)

 static int get_nick_length(void *data)
 {
-        return strlen(((NICK_REC *) data)->nick);
+        return string_width(((NICK_REC *) data)->nick, -1);
 }

 static void display_sorted_nicks(CHANNEL_REC *channel, GSList *nicklist)
@ -333,9 +335,9 @@ static void display_sorted_nicks(CHANNEL_REC *channel, GSList *nicklist)
 	GString *str;
 	GSList *tmp;
 	char *format, *stripped, *prefix_format;
-	char *linebuf, nickmode[2] = { 0, 0 };
+	char *aligned_nick, nickmode[2] = { 0, 0 };
 	int *columns, cols, rows, last_col_rows, col, row, max_width;
-        int item_extra, linebuf_size, formatnum;
+	int item_extra, formatnum;

 	window = window_find_closest(channel->server, channel->visible_name,
 	                             MSGLEVEL_CLIENTCRAP);
@ -394,7 +396,6 @@ static void display_sorted_nicks(CHANNEL_REC *channel, GSList *nicklist)
 		last_col_rows = rows;

 	str = g_string_new(prefix_format);
-	linebuf_size = max_width+1; linebuf = g_malloc(linebuf_size);

 	col = 0; row = 0;
 	for (tmp = nicklist; tmp != NULL; tmp = tmp->next) {
@ -405,13 +406,9 @@ static void display_sorted_nicks(CHANNEL_REC *channel, GSList *nicklist)
 		else
 			nickmode[0] = ' ';

-		if (linebuf_size < columns[col]-item_extra+1) {
-			linebuf_size = (columns[col]-item_extra+1)*2;
-                        linebuf = g_realloc(linebuf, linebuf_size);
-		}
-		memset(linebuf, ' ', columns[col]-item_extra);
-		linebuf[columns[col]-item_extra] = '\0';
-		memcpy(linebuf, rec->nick, strlen(rec->nick));
+		aligned_nick = get_alignment(rec->nick,
+		                             columns[col]-item_extra,
+		                             ALIGN_PAD, ' ');

 		formatnum = rec->op     ? TXT_NAMES_NICK_OP :
 		            rec->halfop ? TXT_NAMES_NICK_HALFOP :
@ -420,8 +417,9 @@ static void display_sorted_nicks(CHANNEL_REC *channel, GSList *nicklist)
 		format = format_get_text(MODULE_NAME, NULL,
 		                         channel->server,
 		                         channel->visible_name,
-					 formatnum, nickmode, linebuf);
+		                         formatnum, nickmode, aligned_nick);
 		g_string_append(str, format);
+		g_free(aligned_nick);
 		g_free(format);

 		if (++col == cols) {
@ -446,7 +444,6 @@ static void display_sorted_nicks(CHANNEL_REC *channel, GSList *nicklist)
 	g_string_free(str, TRUE);
 	g_free_not_null(columns);
 	g_free_not_null(prefix_format);
-	g_free(linebuf);
 }

 void fe_channels_nicklist(CHANNEL_REC *channel, int flags)
--- a/src/fe-common/core/formats.c
+++ b/src/fe-common/core/formats.c
@ -420,33 +420,17 @@ void format_create_dest_tag(TEXT_DEST_REC *dest, void *server,
 		window_find_closest(server, target, level);
 }

-static int advance (char const **str, gboolean utf8)
-{
-	if (utf8) {
-		gunichar c;
-
-		c = g_utf8_get_char(*str);
-		*str = g_utf8_next_char(*str);
-
-		return unichar_isprint(c) ? mk_wcwidth(c) : 1;
-	} else {
-		*str += 1;
-
-		return 1;
-	}
-}
-
 /* Return length of text part in string (ie. without % codes) */
 int format_get_length(const char *str)
 {
 	GString *tmp;
 	int len;
-	gboolean utf8;
+	int utf8;
 	int adv = 0;

 	g_return_val_if_fail(str != NULL, 0);

-	utf8 = is_utf8() && g_utf8_validate(str, -1, NULL);
+	utf8 = string_policy(str);

 	tmp = g_string_new(NULL);
 	len = 0;
@ -465,7 +449,7 @@ int format_get_length(const char *str)
 				len++;
 		}

-		len += advance(&str, utf8);
+		len += string_advance(&str, utf8);
 	}

 	g_string_free(tmp, TRUE);
@ -480,12 +464,12 @@ int format_real_length(const char *str, int len)
 	GString *tmp;
 	const char *start;
 	const char *oldstr;
-	gboolean utf8;
+	int utf8;
 	int adv = 0;
 	g_return_val_if_fail(str != NULL, 0);
 	g_return_val_if_fail(len >= 0, 0);

-	utf8 = is_utf8() && g_utf8_validate(str, -1, NULL);
+	utf8 = string_policy(str);

 	start = str;
 	tmp = g_string_new(NULL);
@ -507,7 +491,7 @@ int format_real_length(const char *str, int len)
 		}

 		oldstr = str;
-		len -= advance(&str, utf8);
+		len -= string_advance(&str, utf8);
 		if (len < 0)
 			str = oldstr;
 	}
--- a/src/fe-common/core/module.h
+++ b/src/fe-common/core/module.h
@ -2,7 +2,7 @@

 #define MODULE_NAME "fe-common/core"

-typedef guint32 unichar;
+#include "utf8.h"
 typedef struct {
 	time_t time;
 	char *nick;
--- a/src/fe-common/core/utf8.c
+++ b/src/fe-common/core/utf8.c
@ -1,26 +0,0 @@
-/* utf8.c - Operations on UTF-8 strings.
- *
- * Copyright (C) 2002 Timo Sirainen
- *
- * Based on GLib code by
- *
- * Copyright (C) 1999 Tom Tromey
- * Copyright (C) 2000 Red Hat, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include "module.h"
-
--- a/src/fe-common/core/utf8.h
+++ b/src/fe-common/core/utf8.h
@ -1,17 +0,0 @@
-#ifndef __UTF8_H
-#define __UTF8_H
-
-/* XXX I didn't check the encoding range of big5+. This is standard big5. */
-#define is_big5_los(lo) (0x40 <= (lo) && (lo) <= 0x7E) /* standard */
-#define is_big5_lox(lo) (0x80 <= (lo) && (lo) <= 0xFE) /* extended */
-#define is_big5_lo(lo)	((is_big5_los(lo) || is_big5_lox(lo)))
-#define is_big5_hi(hi)  (0x81 <= (hi) && (hi) <= 0xFE)
-#define is_big5(hi,lo) (is_big5_hi(hi) && is_big5_lo(lo))
-
-/* Returns width for character (0-2). */
-int mk_wcwidth(unichar c);
-
-#define unichar_isprint(c) (((c) & ~0x80) >= 32)
-#define is_utf8_leading(c) (((c) & 0xc0) != 0x80)
-
-#endif
--- a/src/fe-text/gui-entry.c
+++ b/src/fe-text/gui-entry.c
@ -366,22 +366,8 @@ static int scrlen_str(const char *str)
 	char *stripped;
 	g_return_val_if_fail(str != NULL, 0);

-	str = stripped = strip_codes(str);
-	if (is_utf8() && g_utf8_validate(str, -1, NULL)) {
-
-		while (*str != '\0') {
-			gunichar c;
-
-			c = g_utf8_get_char(str);
-			str = g_utf8_next_char(str);
-
-			len += unichar_isprint(c) ? mk_wcwidth(c) : 1;
-		}
-
-	} else {
-		len = strlen(str);
-	}
-
+	stripped = strip_codes(str);
+	len = string_width(stripped, -1);
 	g_free(stripped);
 	return len;
 }
--- a/src/fe-text/term.h
+++ b/src/fe-text/term.h
@ -27,7 +27,7 @@ typedef struct _TERM_WINDOW TERM_WINDOW;
 #define TERM_TYPE_UTF8		1
 #define TERM_TYPE_BIG5		2

-typedef guint32 unichar;
+#include "utf8.h"

 extern TERM_WINDOW *root_window;
 extern int term_width, term_height;