mirror of
https://github.com/rkd77/elinks.git
synced 2025-06-30 22:19:29 -04:00
UTF-8: Split UCS_REPLACEMENT_CHARACTER off UCS_NO_CHAR.
In the previous version, invalid UTF-8 from a terminal caused UCS_NO_CHAR (0xFFFFFFFD) to be stored in a term_event_key_T, resulting in -3 which was then incidentally treated as an unassigned special key. Now, invalid UTF-8 is instead mapped to UCS_REPLACEMENT_CHARACTER and treated as a character. The fact that handle_interlink_event calls term_send_ucs when it receives invalid UTF-8 makes it pretty clear that this is how it was intended. src/viewer/text/link.c (not changed in this commit) already referred to UCS_REPLACEMENT_CHARACTER in a comment even though it was not previously defined.
This commit is contained in:
parent
fa1859c0f1
commit
0748ee8c92
@ -491,7 +491,7 @@ cp2u_shared(const struct codepage_desc *from, unsigned char c)
|
|||||||
if (from->table[j].c == c)
|
if (from->table[j].c == c)
|
||||||
return from->table[j].u;
|
return from->table[j].u;
|
||||||
|
|
||||||
return UCS_NO_CHAR;
|
return UCS_REPLACEMENT_CHARACTER;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Slow algorithm, used for converting input from the terminal. */
|
/* Slow algorithm, used for converting input from the terminal. */
|
||||||
@ -503,7 +503,7 @@ cp2u(int from, unsigned char c)
|
|||||||
/* UTF-8 is a multibyte codepage and cannot be handled with
|
/* UTF-8 is a multibyte codepage and cannot be handled with
|
||||||
* this function. */
|
* this function. */
|
||||||
assert(codepages[from].table != table_utf_8);
|
assert(codepages[from].table != table_utf_8);
|
||||||
if_assert_failed return UCS_NO_CHAR;
|
if_assert_failed return UCS_REPLACEMENT_CHARACTER;
|
||||||
|
|
||||||
if (c < 0x80) return c;
|
if (c < 0x80) return c;
|
||||||
else return cp2u_shared(&codepages[from], c);
|
else return cp2u_shared(&codepages[from], c);
|
||||||
|
@ -3,7 +3,17 @@
|
|||||||
|
|
||||||
typedef uint32_t unicode_val_T;
|
typedef uint32_t unicode_val_T;
|
||||||
|
|
||||||
/* UCS/Unicode replacement character. */
|
/* U+FFFD REPLACEMENT CHARACTER. Used when no Unicode mapping is
|
||||||
|
* known for a byte in a codepage, or when invalid UTF-8 is received
|
||||||
|
* from a terminal. After generating the character, ELinks then
|
||||||
|
* treats it like any other Unicode character. The user can also type
|
||||||
|
* this character directly, and it can occur in documents. */
|
||||||
|
#define UCS_REPLACEMENT_CHARACTER ((unicode_val_T) 0xFFFD)
|
||||||
|
|
||||||
|
/* A special value that fits in unicode_val_T but is outside the range
|
||||||
|
* of Unicode characters. utf_8_to_unicode and cp_to_unicode return
|
||||||
|
* this if the input is too short. This is also used as a placeholder
|
||||||
|
* for the second cell of a double-cell character. */
|
||||||
#define UCS_NO_CHAR ((unicode_val_T) 0xFFFFFFFD)
|
#define UCS_NO_CHAR ((unicode_val_T) 0xFFFFFFFD)
|
||||||
|
|
||||||
/* replacement character. See u2cp(). */
|
/* replacement character. See u2cp(). */
|
||||||
|
@ -338,20 +338,21 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev)
|
|||||||
unicode_val_T u = interlink->utf_8.ucs;
|
unicode_val_T u = interlink->utf_8.ucs;
|
||||||
|
|
||||||
if (u < interlink->utf_8.min)
|
if (u < interlink->utf_8.min)
|
||||||
u = UCS_NO_CHAR;
|
u = UCS_REPLACEMENT_CHARACTER;
|
||||||
term_send_ucs(term, u,
|
term_send_ucs(term, u,
|
||||||
term->interlink->utf_8.modifier);
|
term->interlink->utf_8.modifier);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
/* The byte sequence for this character is
|
/* The byte sequence for this character
|
||||||
* ending prematurely. Send UCS_NO_CHAR for the
|
* is ending prematurely. Send
|
||||||
* terminated character, but don't break; let
|
* UCS_REPLACEMENT_CHARACTER for the
|
||||||
* this byte be handled below. */
|
* terminated character, but don't break;
|
||||||
|
* let this byte be handled below. */
|
||||||
|
|
||||||
interlink->utf_8.len = 0;
|
interlink->utf_8.len = 0;
|
||||||
term_send_ucs(term, UCS_NO_CHAR,
|
term_send_ucs(term, UCS_REPLACEMENT_CHARACTER,
|
||||||
term->interlink->utf_8.modifier);
|
term->interlink->utf_8.modifier);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -404,7 +405,7 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
term_send_ucs(term, UCS_NO_CHAR, modifier);
|
term_send_ucs(term, UCS_REPLACEMENT_CHARACTER, modifier);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user