mirror of
https://github.com/rkd77/elinks.git
synced 2025-04-18 00:47:36 -04:00
Define and use macros for handling UTF-16 surrogates.
This commit is contained in:
parent
b9d66bd9bd
commit
38fe5b72f7
@ -111,9 +111,9 @@ append_unicode_to_SEE_string(struct SEE_interpreter *interp,
|
|||||||
/* TODO: Should this reject code points in the
|
/* TODO: Should this reject code points in the
|
||||||
* surrogate range? */
|
* surrogate range? */
|
||||||
SEE_string_addch(str, u);
|
SEE_string_addch(str, u);
|
||||||
} else if (u <= 0x10FFFF) {
|
} else if (needs_utf16_surrogates(u)) {
|
||||||
SEE_string_addch(str, 0xD800 + ((u - 0x10000) >> 10));
|
SEE_string_addch(str, get_utf16_high_surrogate(u));
|
||||||
SEE_string_addch(str, 0xDC00 + (u & 0x3FF));
|
SEE_string_addch(str, get_utf16_low_surrogate(u));
|
||||||
} else {
|
} else {
|
||||||
/* str->interpreter exists but is not documented, so don't
|
/* str->interpreter exists but is not documented, so don't
|
||||||
* use it; use a separate @interp parameter instead.
|
* use it; use a separate @interp parameter instead.
|
||||||
@ -131,14 +131,12 @@ SEE_string_to_unicode(struct SEE_interpreter *interp, struct SEE_string *S)
|
|||||||
if (S->length < 1) {
|
if (S->length < 1) {
|
||||||
SEE_error_throw(interp, interp->Error,
|
SEE_error_throw(interp, interp->Error,
|
||||||
"String is empty");
|
"String is empty");
|
||||||
} else if (S->data[0] < 0xD800 || S->data[0] > 0xDFFF) {
|
} else if (!is_utf16_surrogate(S->data[0])) {
|
||||||
return S->data[0];
|
return S->data[0];
|
||||||
} else if (S->length >= 2
|
} else if (S->length >= 2
|
||||||
&& S->data[0] >= 0xD800 && S->data[0] <= 0xDBFF
|
&& is_utf16_high_surrogate(S->data[0])
|
||||||
&& S->data[1] >= 0xDC00 && S->data[1] <= 0xDFFF) {
|
&& is_utf16_low_surrogate(S->data[1])) {
|
||||||
return 0x10000
|
return join_utf16_surrogates(S->data[0], S->data[1]);
|
||||||
+ ((S->data[0] & 0x3FF) << 10)
|
|
||||||
+ (S->data[1] & 0x3FF);
|
|
||||||
} else {
|
} else {
|
||||||
SEE_error_throw(interp, interp->Error,
|
SEE_error_throw(interp, interp->Error,
|
||||||
"Invalid UTF-16 sequence");
|
"Invalid UTF-16 sequence");
|
||||||
|
@ -998,9 +998,9 @@ unicode_to_jsstring(JSContext *ctx, unicode_val_T u)
|
|||||||
* surrogate range? */
|
* surrogate range? */
|
||||||
buf[0] = u;
|
buf[0] = u;
|
||||||
return JS_NewUCStringCopyN(ctx, buf, 1);
|
return JS_NewUCStringCopyN(ctx, buf, 1);
|
||||||
} else if (u <= 0x10FFFF) {
|
} else if (needs_utf16_surrogates(u)) {
|
||||||
buf[0] = 0xD800 + ((u - 0x10000) >> 10);
|
buf[0] = get_utf16_high_surrogate(u);
|
||||||
buf[1] = 0xDC00 + (u & 0x3FF);
|
buf[1] = get_utf16_low_surrogate(u);
|
||||||
return JS_NewUCStringCopyN(ctx, buf, 2);
|
return JS_NewUCStringCopyN(ctx, buf, 2);
|
||||||
} else {
|
} else {
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -1024,12 +1024,12 @@ jsval_to_accesskey(JSContext *ctx, jsval *vp)
|
|||||||
/* This implementation ignores extra characters in the string. */
|
/* This implementation ignores extra characters in the string. */
|
||||||
if (len < 1)
|
if (len < 1)
|
||||||
return 0; /* which means no access key */
|
return 0; /* which means no access key */
|
||||||
if (chr[0] < 0xD800 || chr[0] > 0xDFFF)
|
if (!is_utf16_surrogate(chr[0]))
|
||||||
return chr[0];
|
return chr[0];
|
||||||
if (len >= 2
|
if (len >= 2
|
||||||
&& chr[0] >= 0xD800 && chr[0] <= 0xDBFF
|
&& is_utf16_high_surrogate(chr[0])
|
||||||
&& chr[1] >= 0xDC00 && chr[1] <= 0xDFFF)
|
&& is_utf16_low_surrogate(chr[1]))
|
||||||
return 0x10000 + ((chr[0] & 0x3FF) << 10) + (chr[1] & 0x3FF);
|
return join_utf16_surrogates(chr[0], chr[1]);
|
||||||
JS_ReportError(ctx, "Invalid UTF-16 sequence");
|
JS_ReportError(ctx, "Invalid UTF-16 sequence");
|
||||||
return UCS_NO_CHAR; /* which the caller will reject */
|
return UCS_NO_CHAR; /* which the caller will reject */
|
||||||
}
|
}
|
||||||
|
@ -88,4 +88,16 @@ unsigned char *u2cp_(unicode_val_T, int, int no_nbsp_hack);
|
|||||||
void init_charsets_lookup(void);
|
void init_charsets_lookup(void);
|
||||||
void free_charsets_lookup(void);
|
void free_charsets_lookup(void);
|
||||||
|
|
||||||
|
/* UTF-16 encodes each Unicode character U+0000...U+FFFF as a single
|
||||||
|
* 16-bit code unit, and each character U+10000...U+10FFFF as a pair
|
||||||
|
* of two code units: a high surrogate followed by a low surrogate.
|
||||||
|
* The range U+D800...U+DFFF is reserved for these surrogates. */
|
||||||
|
#define is_utf16_surrogate(u) (((u) & 0xFFFFF800) == 0xD800)
|
||||||
|
#define is_utf16_high_surrogate(u) (((u) & 0xFFFFFC00) == 0xD800)
|
||||||
|
#define is_utf16_low_surrogate(u) (((u) & 0xFFFFFC00) == 0xDC00)
|
||||||
|
#define join_utf16_surrogates(high,low) (0x10000 + (((high) - 0xD800L) << 10) + ((low) - 0xDC00))
|
||||||
|
#define needs_utf16_surrogates(u) ((uint32_t) ((u) - 0x10000) < 0x100000)
|
||||||
|
#define get_utf16_high_surrogate(u) (0xD800 + (((u) - 0x10000) >> 10))
|
||||||
|
#define get_utf16_low_surrogate(u) (0xDC00 + ((u) & 0x3FF))
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user