1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-11-04 08:17:17 -05:00

add_cp_html_to_string: New function.

To be used when strings from gettext or strftime must be inserted
into an HTML document with a potentially different charset.
This commit is contained in:
Kalle Olavi Niemitalo 2007-03-19 22:09:49 +02:00 committed by Witold Filipczyk
parent 269a8f4397
commit 605d04378f
2 changed files with 50 additions and 1 deletions

View File

@ -288,7 +288,6 @@ add_html_to_string(struct string *string, const unsigned char *src, int len)
string->source[rollback_length] = '\0';
return NULL;
}
} else {
if (!add_char_to_string(string, *src))
return NULL;
@ -298,6 +297,50 @@ add_html_to_string(struct string *string, const unsigned char *src, int len)
return string;
}
struct string *
add_cp_html_to_string(struct string *string, int src_codepage,
const unsigned char *src, int len)
{
const unsigned char *const end = src + len;
unicode_val_T unicode;
while (src != end) {
if (is_cp_utf8(src_codepage)) {
#ifdef CONFIG_UTF8
unicode = utf8_to_unicode((unsigned char **) &src,
end);
if (unicode == UCS_NO_CHAR)
break;
#else /* !CONFIG_UTF8 */
/* Cannot parse UTF-8 without CONFIG_UTF8.
* Pretend the input is ISO-8859-1 instead. */
unicode = *src++;
#endif /* !CONFIG_UTF8 */
} else {
unicode = cp2u(src_codepage, *src++);
}
if (unicode < 0x20 || unicode >= 0x7F
|| unicode == '<' || unicode == '>' || unicode == '&'
|| unicode == '\"' || unicode == '\'') {
int rollback_length = string->length;
if (!add_bytes_to_string(string, "&#", 2)
|| !add_long_to_string(string, unicode)
|| !add_char_to_string(string, ';')) {
string->length = rollback_length;
string->source[rollback_length] = '\0';
return NULL;
}
} else {
if (!add_char_to_string(string, unicode))
return NULL;
}
}
return string;
}
/* TODO Optimize later --pasky */
struct string *
add_quoted_to_string(struct string *string, const unsigned char *src, int len)

View File

@ -86,6 +86,12 @@ add_string_replace(struct string *string, unsigned char *src, int len,
* know the charset of the input data.) */
struct string *add_html_to_string(struct string *string, const unsigned char *html, int htmllen);
/* Convert reserved or non-ASCII chars to html &#xx;. The resulting
* string can be correctly parsed in any charset where bytes
* 0x20...0x7E match ASCII. */
struct string *add_cp_html_to_string(struct string *string, int src_codepage,
const unsigned char *html, int htmllen);
/* Escapes \ and " with a \ */
struct string *add_quoted_to_string(struct string *string, const unsigned char *q, int qlen);