mirror of
https://github.com/rkd77/elinks.git
synced 2025-06-30 22:19:29 -04:00
[utf] Debug REPLACEMENT_CHARACTER. Refs #249
This commit is contained in:
parent
4ef70a1cfa
commit
5cd66e06ba
@ -748,66 +748,88 @@ utf8_to_unicode(char **string, const char *end)
|
||||
if (str[0] >= 0x80) {
|
||||
invalid_utf8:
|
||||
++*string;
|
||||
fprintf(stderr, "%s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
return UCS_REPLACEMENT_CHARACTER;
|
||||
}
|
||||
u = str[0];
|
||||
break;
|
||||
case 2: /* U+0080 to U+07FF */
|
||||
if ((str[1] & 0xc0) != 0x80)
|
||||
if ((str[1] & 0xc0) != 0x80) {
|
||||
fprintf(stderr, "goto %s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
goto invalid_utf8;
|
||||
}
|
||||
u = (str[0] & 0x1f) << 6;
|
||||
u += (str[1] & 0x3f);
|
||||
if (u < 0x80)
|
||||
if (u < 0x80) {
|
||||
fprintf(stderr, "goto %s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
goto invalid_utf8;
|
||||
}
|
||||
break;
|
||||
case 3: /* U+0800 to U+FFFF, except surrogates */
|
||||
if ((str[1] & 0xc0) != 0x80 || (str[2] & 0xc0) != 0x80)
|
||||
if ((str[1] & 0xc0) != 0x80 || (str[2] & 0xc0) != 0x80) {
|
||||
fprintf(stderr, "goto %s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
goto invalid_utf8;
|
||||
}
|
||||
u = (str[0] & 0x0f) << 12;
|
||||
u += ((str[1] & 0x3f) << 6);
|
||||
u += (str[2] & 0x3f);
|
||||
if (u < 0x800 || is_utf16_surrogate(u))
|
||||
if (u < 0x800 || is_utf16_surrogate(u)) {
|
||||
fprintf(stderr, "goto %s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
goto invalid_utf8;
|
||||
}
|
||||
break;
|
||||
case 4: /* U+10000 to U+1FFFFF */
|
||||
if ((str[1] & 0xc0) != 0x80 || (str[2] & 0xc0) != 0x80
|
||||
|| (str[3] & 0xc0) != 0x80)
|
||||
|| (str[3] & 0xc0) != 0x80) {
|
||||
fprintf(stderr, "goto %s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
goto invalid_utf8;
|
||||
}
|
||||
u = (str[0] & 0x0f) << 18;
|
||||
u += ((str[1] & 0x3f) << 12);
|
||||
u += ((str[2] & 0x3f) << 6);
|
||||
u += (str[3] & 0x3f);
|
||||
if (u < 0x10000)
|
||||
if (u < 0x10000) {
|
||||
fprintf(stderr, "goto %s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
goto invalid_utf8;
|
||||
}
|
||||
break;
|
||||
case 5: /* U+200000 to U+3FFFFFF */
|
||||
if ((str[1] & 0xc0) != 0x80 || (str[2] & 0xc0) != 0x80
|
||||
|| (str[3] & 0xc0) != 0x80 || (str[4] & 0xc0) != 0x80)
|
||||
|| (str[3] & 0xc0) != 0x80 || (str[4] & 0xc0) != 0x80) {
|
||||
fprintf(stderr, "goto %s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
goto invalid_utf8;
|
||||
}
|
||||
u = (str[0] & 0x0f) << 24;
|
||||
u += ((str[1] & 0x3f) << 18);
|
||||
u += ((str[2] & 0x3f) << 12);
|
||||
u += ((str[3] & 0x3f) << 6);
|
||||
u += (str[4] & 0x3f);
|
||||
if (u < 0x200000)
|
||||
if (u < 0x200000) {
|
||||
fprintf(stderr, "goto %s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
goto invalid_utf8;
|
||||
}
|
||||
break;
|
||||
case 6: /* U+4000000 to U+7FFFFFFF */
|
||||
if ((str[1] & 0xc0) != 0x80 || (str[2] & 0xc0) != 0x80
|
||||
|| (str[3] & 0xc0) != 0x80 || (str[4] & 0xc0) != 0x80
|
||||
|| (str[5] & 0xc0) != 0x80)
|
||||
|| (str[5] & 0xc0) != 0x80) {
|
||||
fprintf(stderr, "goto %s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
goto invalid_utf8;
|
||||
}
|
||||
u = (str[0] & 0x01) << 30;
|
||||
u += ((str[1] & 0x3f) << 24);
|
||||
u += ((str[2] & 0x3f) << 18);
|
||||
u += ((str[3] & 0x3f) << 12);
|
||||
u += ((str[4] & 0x3f) << 6);
|
||||
u += (str[5] & 0x3f);
|
||||
if (u < 0x4000000)
|
||||
if (u < 0x4000000) {
|
||||
fprintf(stderr, "goto %s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
goto invalid_utf8;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
INTERNAL("utf8char_len_tab out of range");
|
||||
fprintf(stderr, "goto %s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
goto invalid_utf8;
|
||||
}
|
||||
*string = (char *)(str + length);
|
||||
@ -820,7 +842,10 @@ cp2u_shared(const struct codepage_desc *from, unsigned char c)
|
||||
{
|
||||
unicode_val_T u = from->highhalf[c - 0x80];
|
||||
|
||||
if (u == 0xFFFF) u = UCS_REPLACEMENT_CHARACTER;
|
||||
if (u == 0xFFFF) {
|
||||
u = UCS_REPLACEMENT_CHARACTER;
|
||||
fprintf(stderr, "%s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
}
|
||||
return u;
|
||||
}
|
||||
|
||||
@ -833,7 +858,10 @@ cp2u(int from, unsigned char c)
|
||||
/* UTF-8 is a multibyte codepage and cannot be handled with
|
||||
* this function. */
|
||||
assert(!is_cp_ptr_utf8(&codepages[from]));
|
||||
if_assert_failed return UCS_REPLACEMENT_CHARACTER;
|
||||
if_assert_failed {
|
||||
fprintf(stderr, "%s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
return UCS_REPLACEMENT_CHARACTER;
|
||||
}
|
||||
|
||||
if (c < 0x80) return c;
|
||||
else return cp2u_shared(&codepages[from], c);
|
||||
|
@ -26,8 +26,11 @@
|
||||
#include <js/SourceText.h>
|
||||
#include <js/Warnings.h>
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define SMJS_HOOKS_FILENAME "hooks.js"
|
||||
|
||||
|
||||
JSContext *smjs_ctx;
|
||||
JSObject *smjs_elinks_object;
|
||||
struct session *smjs_ses;
|
||||
@ -236,6 +239,7 @@ add_jschars_to_utf8_string(struct string *utf8,
|
||||
unicode = join_utf16_surrogates(unicode,
|
||||
utf16[pos++]);
|
||||
} else {
|
||||
fprintf(stderr, "%s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
unicode = UCS_REPLACEMENT_CHARACTER;
|
||||
}
|
||||
}
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include "viewer/text/textarea.h"
|
||||
#include "viewer/timer.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
/** Information used for communication between ELinks instances */
|
||||
struct terminal_interlink {
|
||||
@ -362,8 +363,10 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev)
|
||||
/* UTF-8 allows neither overlong
|
||||
* sequences nor surrogates. */
|
||||
if (u < interlink->utf8.min
|
||||
|| is_utf16_surrogate(u))
|
||||
|| is_utf16_surrogate(u)) {
|
||||
u = UCS_REPLACEMENT_CHARACTER;
|
||||
fprintf(stderr, "%s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
}
|
||||
term_send_ucs(term, u,
|
||||
term->interlink->utf8.modifier);
|
||||
}
|
||||
@ -377,6 +380,7 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev)
|
||||
* let this byte be handled below. */
|
||||
|
||||
interlink->utf8.len = 0;
|
||||
fprintf(stderr, "%s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
term_send_ucs(term, UCS_REPLACEMENT_CHARACTER,
|
||||
term->interlink->utf8.modifier);
|
||||
}
|
||||
@ -447,6 +451,7 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev)
|
||||
}
|
||||
|
||||
invalid_utf8_start_byte:
|
||||
fprintf(stderr, "%s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__);
|
||||
term_send_ucs(term, UCS_REPLACEMENT_CHARACTER, modifier);
|
||||
break;
|
||||
}
|
||||
|
1
test/chars.txt
Normal file
1
test/chars.txt
Normal file
@ -0,0 +1 @@
|
||||
U+00C0 À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï
|
Loading…
x
Reference in New Issue
Block a user