diff --git a/src/intl/charsets.c b/src/intl/charsets.c index fb8faba3e..11aedf286 100644 --- a/src/intl/charsets.c +++ b/src/intl/charsets.c @@ -491,7 +491,7 @@ cp2u_shared(const struct codepage_desc *from, unsigned char c) if (from->table[j].c == c) return from->table[j].u; - return UCS_NO_CHAR; + return UCS_REPLACEMENT_CHARACTER; } /* Slow algorithm, used for converting input from the terminal. */ @@ -503,7 +503,7 @@ cp2u(int from, unsigned char c) /* UTF-8 is a multibyte codepage and cannot be handled with * this function. */ assert(codepages[from].table != table_utf_8); - if_assert_failed return UCS_NO_CHAR; + if_assert_failed return UCS_REPLACEMENT_CHARACTER; if (c < 0x80) return c; else return cp2u_shared(&codepages[from], c); diff --git a/src/intl/charsets.h b/src/intl/charsets.h index 6a28c8ae3..095fe48e3 100644 --- a/src/intl/charsets.h +++ b/src/intl/charsets.h @@ -3,7 +3,17 @@ typedef uint32_t unicode_val_T; -/* UCS/Unicode replacement character. */ +/* U+FFFD REPLACEMENT CHARACTER. Used when no Unicode mapping is + * known for a byte in a codepage, or when invalid UTF-8 is received + * from a terminal. After generating the character, ELinks then + * treats it like any other Unicode character. The user can also type + * this character directly, and it can occur in documents. */ +#define UCS_REPLACEMENT_CHARACTER ((unicode_val_T) 0xFFFD) + +/* A special value that fits in unicode_val_T but is outside the range + * of Unicode characters. utf_8_to_unicode and cp_to_unicode return + * this if the input is too short. This is also used as a placeholder + * for the second cell of a double-cell character. */ #define UCS_NO_CHAR ((unicode_val_T) 0xFFFFFFFD) /*   replacement character. See u2cp(). */ diff --git a/src/terminal/event.c b/src/terminal/event.c index 0f534e52f..43a6105b3 100644 --- a/src/terminal/event.c +++ b/src/terminal/event.c @@ -338,20 +338,21 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev) unicode_val_T u = interlink->utf_8.ucs; if (u < interlink->utf_8.min) - u = UCS_NO_CHAR; + u = UCS_REPLACEMENT_CHARACTER; term_send_ucs(term, u, term->interlink->utf_8.modifier); } break; } else { - /* The byte sequence for this character is - * ending prematurely. Send UCS_NO_CHAR for the - * terminated character, but don't break; let - * this byte be handled below. */ + /* The byte sequence for this character + * is ending prematurely. Send + * UCS_REPLACEMENT_CHARACTER for the + * terminated character, but don't break; + * let this byte be handled below. */ interlink->utf_8.len = 0; - term_send_ucs(term, UCS_NO_CHAR, + term_send_ucs(term, UCS_REPLACEMENT_CHARACTER, term->interlink->utf_8.modifier); } } @@ -404,7 +405,7 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev) break; } - term_send_ucs(term, UCS_NO_CHAR, modifier); + term_send_ucs(term, UCS_REPLACEMENT_CHARACTER, modifier); break; }