mirror of
https://github.com/rkd77/elinks.git
synced 2024-12-04 14:46:47 -05:00
Define cp_to_unicode() even without CONFIG_UTF8.
And make its last parameter point to const. add_cp_html_to_string() no longer needs to pretend UTF-8 is ISO-8859-1.
This commit is contained in:
parent
ad45176dde
commit
8f4d7f9903
@ -254,7 +254,6 @@ encode_utf8(unicode_val_T u)
|
|||||||
return utf_buffer;
|
return utf_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_UTF8
|
|
||||||
/* Number of bytes utf8 character indexed by first byte. Illegal bytes are
|
/* Number of bytes utf8 character indexed by first byte. Illegal bytes are
|
||||||
* equal ones and handled different. */
|
* equal ones and handled different. */
|
||||||
static const char utf8char_len_tab[256] = {
|
static const char utf8char_len_tab[256] = {
|
||||||
@ -268,6 +267,7 @@ static const char utf8char_len_tab[256] = {
|
|||||||
3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 5,5,5,5,6,6,1,1,
|
3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 5,5,5,5,6,6,1,1,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_UTF8
|
||||||
inline int utf8charlen(const unsigned char *p)
|
inline int utf8charlen(const unsigned char *p)
|
||||||
{
|
{
|
||||||
return p ? utf8char_len_tab[*p] : 0;
|
return p ? utf8char_len_tab[*p] : 0;
|
||||||
@ -630,6 +630,7 @@ unicode_fold_label_case(unicode_val_T c)
|
|||||||
return c;
|
return c;
|
||||||
#endif /* !(__STDC_ISO_10646__ && HAVE_WCTYPE_H) */
|
#endif /* !(__STDC_ISO_10646__ && HAVE_WCTYPE_H) */
|
||||||
}
|
}
|
||||||
|
#endif /* CONFIG_UTF8 */
|
||||||
|
|
||||||
inline unicode_val_T
|
inline unicode_val_T
|
||||||
utf8_to_unicode(unsigned char **string, const unsigned char *end)
|
utf8_to_unicode(unsigned char **string, const unsigned char *end)
|
||||||
@ -714,7 +715,6 @@ invalid_utf8:
|
|||||||
*string = str + length;
|
*string = str + length;
|
||||||
return u;
|
return u;
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_UTF8 */
|
|
||||||
|
|
||||||
/* The common part of cp2u and cp2utf_8. */
|
/* The common part of cp2u and cp2utf_8. */
|
||||||
static unicode_val_T
|
static unicode_val_T
|
||||||
@ -753,9 +753,8 @@ cp2utf8(int from, int c)
|
|||||||
return encode_utf8(cp2u_shared(&codepages[from], c));
|
return encode_utf8(cp2u_shared(&codepages[from], c));
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_UTF8
|
|
||||||
unicode_val_T
|
unicode_val_T
|
||||||
cp_to_unicode(int codepage, unsigned char **string, unsigned char *end)
|
cp_to_unicode(int codepage, unsigned char **string, const unsigned char *end)
|
||||||
{
|
{
|
||||||
unicode_val_T ret;
|
unicode_val_T ret;
|
||||||
|
|
||||||
@ -769,7 +768,6 @@ cp_to_unicode(int codepage, unsigned char **string, unsigned char *end)
|
|||||||
++*string;
|
++*string;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_UTF8 */
|
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -144,9 +144,9 @@ unsigned char *utf8_step_backward(unsigned char *, unsigned char *,
|
|||||||
inline int unicode_to_cell(unicode_val_T);
|
inline int unicode_to_cell(unicode_val_T);
|
||||||
unicode_val_T unicode_fold_label_case(unicode_val_T);
|
unicode_val_T unicode_fold_label_case(unicode_val_T);
|
||||||
inline int strlen_utf8(unsigned char **);
|
inline int strlen_utf8(unsigned char **);
|
||||||
inline unicode_val_T utf8_to_unicode(unsigned char **, const unsigned char *);
|
|
||||||
unicode_val_T cp_to_unicode(int, unsigned char **, unsigned char *);
|
|
||||||
#endif /* CONFIG_UTF8 */
|
#endif /* CONFIG_UTF8 */
|
||||||
|
inline unicode_val_T utf8_to_unicode(unsigned char **, const unsigned char *);
|
||||||
|
unicode_val_T cp_to_unicode(int, unsigned char **, const unsigned char *);
|
||||||
|
|
||||||
unicode_val_T cp2u(int, unsigned char);
|
unicode_val_T cp2u(int, unsigned char);
|
||||||
const unsigned char *cp2utf8(int, int);
|
const unsigned char *cp2utf8(int, int);
|
||||||
|
@ -313,21 +313,11 @@ add_cp_html_to_string(struct string *string, int src_codepage,
|
|||||||
const unsigned char *const end = src + len;
|
const unsigned char *const end = src + len;
|
||||||
unicode_val_T unicode;
|
unicode_val_T unicode;
|
||||||
|
|
||||||
while (src != end) {
|
for (;;) {
|
||||||
if (is_cp_utf8(src_codepage)) {
|
unicode = cp_to_unicode(src_codepage,
|
||||||
#ifdef CONFIG_UTF8
|
(unsigned char **) &src, end);
|
||||||
unicode = utf8_to_unicode((unsigned char **) &src,
|
|
||||||
end);
|
|
||||||
if (unicode == UCS_NO_CHAR)
|
if (unicode == UCS_NO_CHAR)
|
||||||
break;
|
break;
|
||||||
#else /* !CONFIG_UTF8 */
|
|
||||||
/* Cannot parse UTF-8 without CONFIG_UTF8.
|
|
||||||
* Pretend the input is ISO-8859-1 instead. */
|
|
||||||
unicode = *src++;
|
|
||||||
#endif /* !CONFIG_UTF8 */
|
|
||||||
} else {
|
|
||||||
unicode = cp2u(src_codepage, *src++);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (unicode < 0x20 || unicode >= 0x7F
|
if (unicode < 0x20 || unicode >= 0x7F
|
||||||
|| unicode == '<' || unicode == '>' || unicode == '&'
|
|| unicode == '<' || unicode == '>' || unicode == '&'
|
||||||
|
Loading…
Reference in New Issue
Block a user