mirror of
https://github.com/rkd77/elinks.git
synced 2025-02-02 15:09:23 -05:00
Define and use macros for handling UTF-16 surrogates.
This commit is contained in:
parent
b9d66bd9bd
commit
38fe5b72f7
@ -111,9 +111,9 @@ append_unicode_to_SEE_string(struct SEE_interpreter *interp,
|
||||
/* TODO: Should this reject code points in the
|
||||
* surrogate range? */
|
||||
SEE_string_addch(str, u);
|
||||
} else if (u <= 0x10FFFF) {
|
||||
SEE_string_addch(str, 0xD800 + ((u - 0x10000) >> 10));
|
||||
SEE_string_addch(str, 0xDC00 + (u & 0x3FF));
|
||||
} else if (needs_utf16_surrogates(u)) {
|
||||
SEE_string_addch(str, get_utf16_high_surrogate(u));
|
||||
SEE_string_addch(str, get_utf16_low_surrogate(u));
|
||||
} else {
|
||||
/* str->interpreter exists but is not documented, so don't
|
||||
* use it; use a separate @interp parameter instead.
|
||||
@ -131,14 +131,12 @@ SEE_string_to_unicode(struct SEE_interpreter *interp, struct SEE_string *S)
|
||||
if (S->length < 1) {
|
||||
SEE_error_throw(interp, interp->Error,
|
||||
"String is empty");
|
||||
} else if (S->data[0] < 0xD800 || S->data[0] > 0xDFFF) {
|
||||
} else if (!is_utf16_surrogate(S->data[0])) {
|
||||
return S->data[0];
|
||||
} else if (S->length >= 2
|
||||
&& S->data[0] >= 0xD800 && S->data[0] <= 0xDBFF
|
||||
&& S->data[1] >= 0xDC00 && S->data[1] <= 0xDFFF) {
|
||||
return 0x10000
|
||||
+ ((S->data[0] & 0x3FF) << 10)
|
||||
+ (S->data[1] & 0x3FF);
|
||||
&& is_utf16_high_surrogate(S->data[0])
|
||||
&& is_utf16_low_surrogate(S->data[1])) {
|
||||
return join_utf16_surrogates(S->data[0], S->data[1]);
|
||||
} else {
|
||||
SEE_error_throw(interp, interp->Error,
|
||||
"Invalid UTF-16 sequence");
|
||||
|
@ -998,9 +998,9 @@ unicode_to_jsstring(JSContext *ctx, unicode_val_T u)
|
||||
* surrogate range? */
|
||||
buf[0] = u;
|
||||
return JS_NewUCStringCopyN(ctx, buf, 1);
|
||||
} else if (u <= 0x10FFFF) {
|
||||
buf[0] = 0xD800 + ((u - 0x10000) >> 10);
|
||||
buf[1] = 0xDC00 + (u & 0x3FF);
|
||||
} else if (needs_utf16_surrogates(u)) {
|
||||
buf[0] = get_utf16_high_surrogate(u);
|
||||
buf[1] = get_utf16_low_surrogate(u);
|
||||
return JS_NewUCStringCopyN(ctx, buf, 2);
|
||||
} else {
|
||||
return NULL;
|
||||
@ -1024,12 +1024,12 @@ jsval_to_accesskey(JSContext *ctx, jsval *vp)
|
||||
/* This implementation ignores extra characters in the string. */
|
||||
if (len < 1)
|
||||
return 0; /* which means no access key */
|
||||
if (chr[0] < 0xD800 || chr[0] > 0xDFFF)
|
||||
if (!is_utf16_surrogate(chr[0]))
|
||||
return chr[0];
|
||||
if (len >= 2
|
||||
&& chr[0] >= 0xD800 && chr[0] <= 0xDBFF
|
||||
&& chr[1] >= 0xDC00 && chr[1] <= 0xDFFF)
|
||||
return 0x10000 + ((chr[0] & 0x3FF) << 10) + (chr[1] & 0x3FF);
|
||||
&& is_utf16_high_surrogate(chr[0])
|
||||
&& is_utf16_low_surrogate(chr[1]))
|
||||
return join_utf16_surrogates(chr[0], chr[1]);
|
||||
JS_ReportError(ctx, "Invalid UTF-16 sequence");
|
||||
return UCS_NO_CHAR; /* which the caller will reject */
|
||||
}
|
||||
|
@ -88,4 +88,16 @@ unsigned char *u2cp_(unicode_val_T, int, int no_nbsp_hack);
|
||||
void init_charsets_lookup(void);
|
||||
void free_charsets_lookup(void);
|
||||
|
||||
/* UTF-16 encodes each Unicode character U+0000...U+FFFF as a single
|
||||
* 16-bit code unit, and each character U+10000...U+10FFFF as a pair
|
||||
* of two code units: a high surrogate followed by a low surrogate.
|
||||
* The range U+D800...U+DFFF is reserved for these surrogates. */
|
||||
#define is_utf16_surrogate(u) (((u) & 0xFFFFF800) == 0xD800)
|
||||
#define is_utf16_high_surrogate(u) (((u) & 0xFFFFFC00) == 0xD800)
|
||||
#define is_utf16_low_surrogate(u) (((u) & 0xFFFFFC00) == 0xDC00)
|
||||
#define join_utf16_surrogates(high,low) (0x10000 + (((high) - 0xD800L) << 10) + ((low) - 0xDC00))
|
||||
#define needs_utf16_surrogates(u) ((uint32_t) ((u) - 0x10000) < 0x100000)
|
||||
#define get_utf16_high_surrogate(u) (0xD800 + (((u) - 0x10000) >> 10))
|
||||
#define get_utf16_low_surrogate(u) (0xDC00 + ((u) & 0x3FF))
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user