1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-12-04 14:46:47 -05:00

Define cp_to_unicode() even without CONFIG_UTF8.

And make its last parameter point to const.  add_cp_html_to_string()
no longer needs to pretend UTF-8 is ISO-8859-1.
This commit is contained in:
Kalle Olavi Niemitalo 2008-10-18 13:51:04 +03:00 committed by Kalle Olavi Niemitalo
parent ad45176dde
commit 8f4d7f9903
3 changed files with 10 additions and 22 deletions

View File

@ -254,7 +254,6 @@ encode_utf8(unicode_val_T u)
return utf_buffer; return utf_buffer;
} }
#ifdef CONFIG_UTF8
/* Number of bytes utf8 character indexed by first byte. Illegal bytes are /* Number of bytes utf8 character indexed by first byte. Illegal bytes are
* equal ones and handled different. */ * equal ones and handled different. */
static const char utf8char_len_tab[256] = { static const char utf8char_len_tab[256] = {
@ -268,6 +267,7 @@ static const char utf8char_len_tab[256] = {
3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 5,5,5,5,6,6,1,1, 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 5,5,5,5,6,6,1,1,
}; };
#ifdef CONFIG_UTF8
inline int utf8charlen(const unsigned char *p) inline int utf8charlen(const unsigned char *p)
{ {
return p ? utf8char_len_tab[*p] : 0; return p ? utf8char_len_tab[*p] : 0;
@ -630,6 +630,7 @@ unicode_fold_label_case(unicode_val_T c)
return c; return c;
#endif /* !(__STDC_ISO_10646__ && HAVE_WCTYPE_H) */ #endif /* !(__STDC_ISO_10646__ && HAVE_WCTYPE_H) */
} }
#endif /* CONFIG_UTF8 */
inline unicode_val_T inline unicode_val_T
utf8_to_unicode(unsigned char **string, const unsigned char *end) utf8_to_unicode(unsigned char **string, const unsigned char *end)
@ -714,7 +715,6 @@ invalid_utf8:
*string = str + length; *string = str + length;
return u; return u;
} }
#endif /* CONFIG_UTF8 */
/* The common part of cp2u and cp2utf_8. */ /* The common part of cp2u and cp2utf_8. */
static unicode_val_T static unicode_val_T
@ -753,9 +753,8 @@ cp2utf8(int from, int c)
return encode_utf8(cp2u_shared(&codepages[from], c)); return encode_utf8(cp2u_shared(&codepages[from], c));
} }
#ifdef CONFIG_UTF8
unicode_val_T unicode_val_T
cp_to_unicode(int codepage, unsigned char **string, unsigned char *end) cp_to_unicode(int codepage, unsigned char **string, const unsigned char *end)
{ {
unicode_val_T ret; unicode_val_T ret;
@ -769,7 +768,6 @@ cp_to_unicode(int codepage, unsigned char **string, unsigned char *end)
++*string; ++*string;
return ret; return ret;
} }
#endif /* CONFIG_UTF8 */
static void static void

View File

@ -144,9 +144,9 @@ unsigned char *utf8_step_backward(unsigned char *, unsigned char *,
inline int unicode_to_cell(unicode_val_T); inline int unicode_to_cell(unicode_val_T);
unicode_val_T unicode_fold_label_case(unicode_val_T); unicode_val_T unicode_fold_label_case(unicode_val_T);
inline int strlen_utf8(unsigned char **); inline int strlen_utf8(unsigned char **);
inline unicode_val_T utf8_to_unicode(unsigned char **, const unsigned char *);
unicode_val_T cp_to_unicode(int, unsigned char **, unsigned char *);
#endif /* CONFIG_UTF8 */ #endif /* CONFIG_UTF8 */
inline unicode_val_T utf8_to_unicode(unsigned char **, const unsigned char *);
unicode_val_T cp_to_unicode(int, unsigned char **, const unsigned char *);
unicode_val_T cp2u(int, unsigned char); unicode_val_T cp2u(int, unsigned char);
const unsigned char *cp2utf8(int, int); const unsigned char *cp2utf8(int, int);

View File

@ -313,21 +313,11 @@ add_cp_html_to_string(struct string *string, int src_codepage,
const unsigned char *const end = src + len; const unsigned char *const end = src + len;
unicode_val_T unicode; unicode_val_T unicode;
while (src != end) { for (;;) {
if (is_cp_utf8(src_codepage)) { unicode = cp_to_unicode(src_codepage,
#ifdef CONFIG_UTF8 (unsigned char **) &src, end);
unicode = utf8_to_unicode((unsigned char **) &src, if (unicode == UCS_NO_CHAR)
end); break;
if (unicode == UCS_NO_CHAR)
break;
#else /* !CONFIG_UTF8 */
/* Cannot parse UTF-8 without CONFIG_UTF8.
* Pretend the input is ISO-8859-1 instead. */
unicode = *src++;
#endif /* !CONFIG_UTF8 */
} else {
unicode = cp2u(src_codepage, *src++);
}
if (unicode < 0x20 || unicode >= 0x7F if (unicode < 0x20 || unicode >= 0x7F
|| unicode == '<' || unicode == '>' || unicode == '&' || unicode == '<' || unicode == '>' || unicode == '&'