Define cp_to_unicode() even without CONFIG_UTF8.

And make its last parameter point to const. add_cp_html_to_string() no longer needs to pretend UTF-8 is ISO-8859-1.
2025-02-02 15:09:23 -05:00 · 2008-10-18 13:51:04 +03:00 · 2008-10-18 13:51:04 +03:00 · 8f4d7f9903
commit 8f4d7f9903
parent ad45176dde
3 changed files with 10 additions and 22 deletions
--- a/src/intl/charsets.c
+++ b/src/intl/charsets.c
@ -254,7 +254,6 @@ encode_utf8(unicode_val_T u)
 	return utf_buffer;
 }

-#ifdef CONFIG_UTF8
 /* Number of bytes utf8 character indexed by first byte. Illegal bytes are
 * equal ones and handled different. */
 static const char utf8char_len_tab[256] = {
@ -268,6 +267,7 @@ static const char utf8char_len_tab[256] = {
 	3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 5,5,5,5,6,6,1,1,
 };

+#ifdef CONFIG_UTF8
 inline int utf8charlen(const unsigned char *p)
 {
 	return p ? utf8char_len_tab[*p] : 0;
@ -630,6 +630,7 @@ unicode_fold_label_case(unicode_val_T c)
 		return c;
 #endif /* !(__STDC_ISO_10646__ && HAVE_WCTYPE_H) */
 }
+#endif /* CONFIG_UTF8 */

 inline unicode_val_T
 utf8_to_unicode(unsigned char **string, const unsigned char *end)
@ -714,7 +715,6 @@ invalid_utf8:
 	*string = str + length;
 	return u;
 }
-#endif /* CONFIG_UTF8 */

 /* The common part of cp2u and cp2utf_8.  */
 static unicode_val_T
@ -753,9 +753,8 @@ cp2utf8(int from, int c)
 	return encode_utf8(cp2u_shared(&codepages[from], c));
 }

-#ifdef CONFIG_UTF8
 unicode_val_T
-cp_to_unicode(int codepage, unsigned char **string, unsigned char *end)
+cp_to_unicode(int codepage, unsigned char **string, const unsigned char *end)
 {
 	unicode_val_T ret;

@ -769,7 +768,6 @@ cp_to_unicode(int codepage, unsigned char **string, unsigned char *end)
 	++*string;
 	return ret;
 }
-#endif	/* CONFIG_UTF8 */


 static void
--- a/src/intl/charsets.h
+++ b/src/intl/charsets.h
@ -144,9 +144,9 @@ unsigned char *utf8_step_backward(unsigned char *, unsigned char *,
 inline int unicode_to_cell(unicode_val_T);
 unicode_val_T unicode_fold_label_case(unicode_val_T);
 inline int strlen_utf8(unsigned char **);
-inline unicode_val_T utf8_to_unicode(unsigned char **, const unsigned char *);
-unicode_val_T cp_to_unicode(int, unsigned char **, unsigned char *);
 #endif /* CONFIG_UTF8 */
+inline unicode_val_T utf8_to_unicode(unsigned char **, const unsigned char *);
+unicode_val_T cp_to_unicode(int, unsigned char **, const unsigned char *);

 unicode_val_T cp2u(int, unsigned char);
 const unsigned char *cp2utf8(int, int);
--- a/src/util/conv.c
+++ b/src/util/conv.c
@ -313,21 +313,11 @@ add_cp_html_to_string(struct string *string, int src_codepage,
 	const unsigned char *const end = src + len;
 	unicode_val_T unicode;

-	while (src != end) {
-		if (is_cp_utf8(src_codepage)) {
-#ifdef CONFIG_UTF8
-			unicode = utf8_to_unicode((unsigned char **) &src,
-						  end);
-			if (unicode == UCS_NO_CHAR)
-				break;
-#else  /* !CONFIG_UTF8 */
-			/* Cannot parse UTF-8 without CONFIG_UTF8.
-			 * Pretend the input is ISO-8859-1 instead.  */
-			unicode = *src++;
-#endif /* !CONFIG_UTF8 */
-		} else {
-			unicode = cp2u(src_codepage, *src++);
-		}
+	for (;;) {
+		unicode = cp_to_unicode(src_codepage,
+					(unsigned char **) &src, end);
+		if (unicode == UCS_NO_CHAR)
+			break;

 		if (unicode < 0x20 || unicode >= 0x7F
 		    || unicode == '<' || unicode == '>' || unicode == '&'