From 2e860abd2b8b3a30f74005450830c34a318b64a3 Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Fri, 2 Oct 2015 14:04:04 +0200 Subject: [PATCH 1/5] Fix the display of utf8 sequences in the gui term_addstr() had a long-standing fixme that suggested it didn't take into account the string encoding when calculating the string length. The BIG5 code path is untested. --- src/fe-common/core/utf8.h | 2 ++ src/fe-text/gui-printtext.c | 7 +++-- src/fe-text/term-terminfo.c | 53 ++++++++++++++++++++++++++++++++++--- src/fe-text/term.h | 2 +- 4 files changed, 55 insertions(+), 9 deletions(-) diff --git a/src/fe-common/core/utf8.h b/src/fe-common/core/utf8.h index 3c15dc7d..70b44d7e 100644 --- a/src/fe-common/core/utf8.h +++ b/src/fe-common/core/utf8.h @@ -8,6 +8,8 @@ #define is_big5_hi(hi) (0x81 <= (hi) && (hi) <= 0xFE) #define is_big5(hi,lo) (is_big5_hi(hi) && is_big5_lo(lo)) +int strlen_big5(const unsigned char *str); + /* Returns width for character (0-2). */ int mk_wcwidth(unichar c); diff --git a/src/fe-text/gui-printtext.c b/src/fe-text/gui-printtext.c index 547d39c9..d8272df5 100644 --- a/src/fe-text/gui-printtext.c +++ b/src/fe-text/gui-printtext.c @@ -220,16 +220,15 @@ static void sig_gui_print_text(WINDOW_REC *window, void *fgcolor, get_colors(flags, &fg, &bg, &attr); if (window == NULL) { - g_return_if_fail(next_xpos != -1); + g_return_if_fail(next_xpos != -1); term_set_color2(root_window, attr, fg, bg); term_move(root_window, next_xpos, next_ypos); if (flags & GUI_PRINT_FLAG_CLRTOEOL) term_clrtoeol(root_window); - term_addstr(root_window, str); - next_xpos += strlen(str); /* FIXME utf8 or big5 */ - return; + next_xpos += term_addstr(root_window, str); + return; } lineinfo.level = dest == NULL ? 0 : dest->level; diff --git a/src/fe-text/term-terminfo.c b/src/fe-text/term-terminfo.c index ded79c28..8c95bc0d 100644 --- a/src/fe-text/term-terminfo.c +++ b/src/fe-text/term-terminfo.c @@ -522,15 +522,60 @@ void term_add_unichar(TERM_WINDOW *window, unichar chr) } } -void term_addstr(TERM_WINDOW *window, const char *str) +int term_addstr(TERM_WINDOW *window, const char *str) { - int len; + int i, len, raw_len; + unichar *tmp; + const char *ptr; if (vcmove) term_move_real(); - len = strlen(str); /* FIXME utf8 or big5 */ + + raw_len = strlen(str); + + /* The string length depends on the terminal encoding */ + switch (term_type) { + case TERM_TYPE_BIG5: + len = strlen_big5((const unsigned char *)str); + break; + case TERM_TYPE_UTF8: + len = g_utf8_strlen(str, -1); + break; + default: + len = strlen(str); + break; + } + + tmp = calloc(len, sizeof(unichar)); + if (tmp == NULL) + return 0; + + switch (term_type) { + case TERM_TYPE_BIG5: + big5_to_unichars(str, tmp); + break; + case TERM_TYPE_UTF8: + ptr = str; + for (i = 0; i < len; i++) { + tmp[i] = g_utf8_get_char(ptr); + ptr = g_utf8_next_char(ptr); + } + break; + default: + for (i = 0; i < len; i++) + tmp[i] = str[i]; + } + + for (len = i = 0; i < len; i++) + len += unichar_isprint(tmp[i]) ? mk_wcwidth(tmp[i]) : 1; + + free(tmp); + term_printed_text(len); - fwrite(str, 1, len, window->term->out); + /* Use strlen() here since we need the number of raw bytes */ + fwrite(str, 1, raw_len, window->term->out); + + return len; } void term_clrtoeol(TERM_WINDOW *window) diff --git a/src/fe-text/term.h b/src/fe-text/term.h index cdcc787a..f0a76c42 100644 --- a/src/fe-text/term.h +++ b/src/fe-text/term.h @@ -83,7 +83,7 @@ void term_set_color(TERM_WINDOW *window, int col); void term_move(TERM_WINDOW *window, int x, int y); void term_addch(TERM_WINDOW *window, char chr); void term_add_unichar(TERM_WINDOW *window, unichar chr); -void term_addstr(TERM_WINDOW *window, const char *str); +int term_addstr(TERM_WINDOW *window, const char *str); void term_clrtoeol(TERM_WINDOW *window); void term_move_cursor(int x, int y); From c351c448b8dd2b9759e46c0c6e73ed5ead936ffc Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Fri, 2 Oct 2015 15:02:43 +0200 Subject: [PATCH 2/5] Rework the logic to avoid allocating memory --- src/fe-text/term-terminfo.c | 52 ++++++++++++++----------------------- 1 file changed, 19 insertions(+), 33 deletions(-) diff --git a/src/fe-text/term-terminfo.c b/src/fe-text/term-terminfo.c index 8c95bc0d..2ee69a1c 100644 --- a/src/fe-text/term-terminfo.c +++ b/src/fe-text/term-terminfo.c @@ -524,52 +524,38 @@ void term_add_unichar(TERM_WINDOW *window, unichar chr) int term_addstr(TERM_WINDOW *window, const char *str) { - int i, len, raw_len; - unichar *tmp; + int len, raw_len; + unichar tmp; const char *ptr; if (vcmove) term_move_real(); + len = 0; raw_len = strlen(str); /* The string length depends on the terminal encoding */ - switch (term_type) { - case TERM_TYPE_BIG5: - len = strlen_big5((const unsigned char *)str); - break; - case TERM_TYPE_UTF8: - len = g_utf8_strlen(str, -1); - break; - default: - len = strlen(str); - break; - } - tmp = calloc(len, sizeof(unichar)); - if (tmp == NULL) - return 0; + ptr = str; - switch (term_type) { - case TERM_TYPE_BIG5: - big5_to_unichars(str, tmp); - break; - case TERM_TYPE_UTF8: - ptr = str; - for (i = 0; i < len; i++) { - tmp[i] = g_utf8_get_char(ptr); + if (term_type != TERM_TYPE_BIG5) { + while (*ptr != '\0') { + tmp = g_utf8_get_char(ptr); + len += unichar_isprint(tmp) ? mk_wcwidth(tmp) : 1; ptr = g_utf8_next_char(ptr); } - break; - default: - for (i = 0; i < len; i++) - tmp[i] = str[i]; + } else { + while (*ptr != '\0') { + if (is_big5(ptr[0], ptr[1])) { + tmp = ptr[0] << 8 | ptr[1]; + ptr += 2; + } else { + tmp = *ptr; + ptr += 1; + } + len += (tmp > 0xff) ? 2 : 1; + } } - for (len = i = 0; i < len; i++) - len += unichar_isprint(tmp[i]) ? mk_wcwidth(tmp[i]) : 1; - - free(tmp); - term_printed_text(len); /* Use strlen() here since we need the number of raw bytes */ From c7646dc58d1e70abc8d2981e08baa83e459affdb Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Fri, 2 Oct 2015 15:07:59 +0200 Subject: [PATCH 3/5] Even simpler logic --- src/fe-text/term-terminfo.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/fe-text/term-terminfo.c b/src/fe-text/term-terminfo.c index 2ee69a1c..6d289cfc 100644 --- a/src/fe-text/term-terminfo.c +++ b/src/fe-text/term-terminfo.c @@ -543,18 +543,8 @@ int term_addstr(TERM_WINDOW *window, const char *str) len += unichar_isprint(tmp) ? mk_wcwidth(tmp) : 1; ptr = g_utf8_next_char(ptr); } - } else { - while (*ptr != '\0') { - if (is_big5(ptr[0], ptr[1])) { - tmp = ptr[0] << 8 | ptr[1]; - ptr += 2; - } else { - tmp = *ptr; - ptr += 1; - } - len += (tmp > 0xff) ? 2 : 1; - } - } + } else + len = raw_len; term_printed_text(len); From 48ab298a67151e6fce33c1d7b34f4f83796b8d9a Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Fri, 2 Oct 2015 15:08:48 +0200 Subject: [PATCH 4/5] Kill an unneeded declaration --- src/fe-common/core/utf8.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/fe-common/core/utf8.h b/src/fe-common/core/utf8.h index 70b44d7e..3c15dc7d 100644 --- a/src/fe-common/core/utf8.h +++ b/src/fe-common/core/utf8.h @@ -8,8 +8,6 @@ #define is_big5_hi(hi) (0x81 <= (hi) && (hi) <= 0xFE) #define is_big5(hi,lo) (is_big5_hi(hi) && is_big5_lo(lo)) -int strlen_big5(const unsigned char *str); - /* Returns width for character (0-2). */ int mk_wcwidth(unichar c); From 0140e7c6b23ff1490965b080a7882b2508677d55 Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Sun, 4 Oct 2015 11:56:54 +0200 Subject: [PATCH 5/5] Fix the indentation. --- src/fe-text/term-terminfo.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/fe-text/term-terminfo.c b/src/fe-text/term-terminfo.c index 6d289cfc..27be904e 100644 --- a/src/fe-text/term-terminfo.c +++ b/src/fe-text/term-terminfo.c @@ -537,14 +537,14 @@ int term_addstr(TERM_WINDOW *window, const char *str) ptr = str; - if (term_type != TERM_TYPE_BIG5) { - while (*ptr != '\0') { - tmp = g_utf8_get_char(ptr); - len += unichar_isprint(tmp) ? mk_wcwidth(tmp) : 1; - ptr = g_utf8_next_char(ptr); - } + if (term_type == TERM_TYPE_UTF8) { + while (*ptr != '\0') { + tmp = g_utf8_get_char(ptr); + len += unichar_isprint(tmp) ? mk_wcwidth(tmp) : 1; + ptr = g_utf8_next_char(ptr); + } } else - len = raw_len; + len = raw_len; term_printed_text(len);