diff --git a/src/intl/charsets.h b/src/intl/charsets.h index e6dbe707d..69fb3cb96 100644 --- a/src/intl/charsets.h +++ b/src/intl/charsets.h @@ -3,6 +3,10 @@ typedef uint32_t unicode_val_T; +/* U+0020 SPACE. Normally the same as ' ' or L' ' but perhaps ELinks + * shouldn't rely on that. */ +#define UCS_SPACE ((unicode_val_T) 0x0020) + /* U+FFFD REPLACEMENT CHARACTER. Used when no Unicode mapping is * known for a byte in a codepage, or when invalid UTF-8 is received * from a terminal. After generating the character, ELinks then diff --git a/src/terminal/screen.c b/src/terminal/screen.c index 371f55a6c..f0f096ff6 100644 --- a/src/terminal/screen.c +++ b/src/terminal/screen.c @@ -542,13 +542,10 @@ add_char_data(struct string *screen, struct screen_driver *driver, * defined 0 1 enum border_char border unibyte * defined 1 0 UTF-32 UTF-8 * defined 1 1 enum border_char border unibyte + * + * For "UTF-32" above, the data can also be UCS_NO_CHAR. */ - if (!isscreensafe(data)) { - add_char_to_string(screen, ' '); - return; - } - if (border && driver->frame && data >= 176 && data < 224) data = driver->frame[data - 176]; @@ -556,18 +553,25 @@ add_char_data(struct string *screen, struct screen_driver *driver, #ifdef CONFIG_UTF8 if (border) add_char_to_string(screen, (unsigned char)data); - else - if (data != UCS_NO_CHAR) - add_to_string(screen, encode_utf8(data)); + else if (data != UCS_NO_CHAR) { + if (!isscreensafe_ucs(data)) + data = UCS_SPACE; + add_to_string(screen, encode_utf8(data)); + } #else int charset = driver->charsets[!!border]; - add_to_string(screen, cp2utf8(charset, data)); + if (border || isscreensafe(data)) + add_to_string(screen, cp2utf8(charset, data)); + else /* UCS_SPACE <= 0x7F and so fits in one UTF-8 byte */ + add_char_to_string(screen, UCS_SPACE); #endif /* CONFIG_UTF8 */ - return; + } else { + if (border || isscreensafe(data)) + add_char_to_string(screen, (unsigned char)data); + else + add_char_to_string(screen, ' '); } - - add_char_to_string(screen, (unsigned char)data); } /* Time critical section. */ diff --git a/src/util/string.h b/src/util/string.h index 4396fdaf7..a66d7479c 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -114,6 +114,9 @@ int elinks_strlcasecmp(const unsigned char *s1, size_t n1, * non-ISO-8859 charsets. */ #define isscreensafe(c) ((c) >= ' ' && (c) != ASCII_DEL) +/* Like isscreensafe but takes Unicode values and so can check for C1. */ +#define isscreensafe_ucs(c) (((c) >= 0x20 && (c) <= 0x7E) || (c) >= 0xA0) + /* String debugging using magic number, it may catch some errors. */ #ifdef CONFIG_DEBUG diff --git a/src/viewer/dump/dump.c b/src/viewer/dump/dump.c index ab3c170d3..63b2bf1f5 100644 --- a/src/viewer/dump/dump.c +++ b/src/viewer/dump/dump.c @@ -405,7 +405,7 @@ utf8: unicode_val_T data = pos->data; unsigned int frame = (pos->attr & SCREEN_ATTR_FRAME); - if (!isscreensafe(data)) { + if (!isscreensafe_ucs(data)) { white++; continue; } else {