mirror of
https://github.com/rkd77/elinks.git
synced 2025-01-03 14:57:44 -05:00
HTML bug 1114: Don't doubly decode entities in attributes
The HTML parser decoded SGML entity references and numeric character references in the following attributes, and then the renderer did the same again: link/@title link/@hreflang link/@type link/@media img/@alt area/@alt input[@type="image"]/@alt input[@type="image"]/@name input[@type="button"]/@value The result was that e.g. title="A" displayed as "A" even though it was supposed to display as "A". Fix by making the HTML parser tell the renderer that the entities have already been decoded.
This commit is contained in:
parent
0f49fe1c38
commit
715571a5d6
2
NEWS
2
NEWS
@ -47,6 +47,8 @@ Miscellaneous:
|
|||||||
supported UTF-8 as the dump charset.)
|
supported UTF-8 as the dump charset.)
|
||||||
* Really retry forever when connection.retries = 0.
|
* Really retry forever when connection.retries = 0.
|
||||||
* minor bug 1113: Fix a small memory leak if a mailcap file is malformed.
|
* minor bug 1113: Fix a small memory leak if a mailcap file is malformed.
|
||||||
|
* minor bug 1114: Decode SGML entities and NCRs only once in link/@title
|
||||||
|
and other attributes.
|
||||||
* enhancement: Session-specific options. Any options changed with
|
* enhancement: Session-specific options. Any options changed with
|
||||||
toggle-* actions no longer affect other tabs or other terminals.
|
toggle-* actions no longer affect other tabs or other terminals.
|
||||||
* Do not crash when document.browse.minimum_refresh_time = 0 and
|
* Do not crash when document.browse.minimum_refresh_time = 0 and
|
||||||
|
@ -91,6 +91,9 @@ struct link {
|
|||||||
unsigned char *where;
|
unsigned char *where;
|
||||||
unsigned char *target;
|
unsigned char *target;
|
||||||
unsigned char *where_img;
|
unsigned char *where_img;
|
||||||
|
|
||||||
|
/** The title of the link. This is in the document charset,
|
||||||
|
* and entities have already been decoded. */
|
||||||
unsigned char *title;
|
unsigned char *title;
|
||||||
|
|
||||||
/** The set of characters belonging to this link (their coordinates
|
/** The set of characters belonging to this link (their coordinates
|
||||||
|
@ -14,6 +14,11 @@ enum text_style_format {
|
|||||||
AT_FIXED = 8,
|
AT_FIXED = 8,
|
||||||
AT_GRAPHICS = 16,
|
AT_GRAPHICS = 16,
|
||||||
AT_PREFORMATTED = 32,
|
AT_PREFORMATTED = 32,
|
||||||
|
|
||||||
|
/* AT_NO_ENTITIES means the parser has already expanded
|
||||||
|
* entities and numeric character references, so the put_chars
|
||||||
|
* function of the renderer must not do that again. */
|
||||||
|
AT_NO_ENTITIES = 64,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct text_style_color {
|
struct text_style_color {
|
||||||
|
@ -662,8 +662,10 @@ look_for_link(unsigned char **pos, unsigned char *eof, struct menu_item **menu,
|
|||||||
unsigned char *alt = get_attr_val(attr, "alt", options->cp);
|
unsigned char *alt = get_attr_val(attr, "alt", options->cp);
|
||||||
|
|
||||||
if (alt) {
|
if (alt) {
|
||||||
|
/* CSM_NONE because get_attr_val() already
|
||||||
|
* decoded entities. */
|
||||||
label = convert_string(ct, alt, strlen(alt),
|
label = convert_string(ct, alt, strlen(alt),
|
||||||
options->cp, CSM_DEFAULT,
|
options->cp, CSM_NONE,
|
||||||
NULL, NULL, NULL);
|
NULL, NULL, NULL);
|
||||||
mem_free(alt);
|
mem_free(alt);
|
||||||
} else {
|
} else {
|
||||||
@ -864,7 +866,11 @@ done_html_parser_state(struct html_context *html_context,
|
|||||||
|
|
||||||
/* This function does not set html_context.doc_cp = document.cp,
|
/* This function does not set html_context.doc_cp = document.cp,
|
||||||
* because it does not know the document, and because the codepage has
|
* because it does not know the document, and because the codepage has
|
||||||
* not even been decided when it is called. */
|
* not even been decided when it is called.
|
||||||
|
*
|
||||||
|
* @param[out] title
|
||||||
|
* The title of the document. This is in the document charset,
|
||||||
|
* and entities have not been decoded. */
|
||||||
struct html_context *
|
struct html_context *
|
||||||
init_html_parser(struct uri *uri, struct document_options *options,
|
init_html_parser(struct uri *uri, struct document_options *options,
|
||||||
unsigned char *start, unsigned char *end,
|
unsigned char *start, unsigned char *end,
|
||||||
|
@ -39,7 +39,10 @@ struct text_attrib {
|
|||||||
unsigned char *link;
|
unsigned char *link;
|
||||||
unsigned char *target;
|
unsigned char *target;
|
||||||
unsigned char *image;
|
unsigned char *image;
|
||||||
|
|
||||||
|
/* Any entities in the title have already been decoded. */
|
||||||
unsigned char *title;
|
unsigned char *title;
|
||||||
|
|
||||||
struct form_control *form;
|
struct form_control *form;
|
||||||
|
|
||||||
struct text_attrib_color color;
|
struct text_attrib_color color;
|
||||||
|
@ -232,12 +232,14 @@ html_input_format(struct html_context *html_context, unsigned char *a,
|
|||||||
}
|
}
|
||||||
format.style.attr |= AT_BOLD;
|
format.style.attr |= AT_BOLD;
|
||||||
put_chrs(html_context, "[ ", 7);
|
put_chrs(html_context, "[ ", 7);
|
||||||
|
format.style.attr |= AT_NO_ENTITIES;
|
||||||
if (fc->alt)
|
if (fc->alt)
|
||||||
put_chrs(html_context, fc->alt, strlen(fc->alt));
|
put_chrs(html_context, fc->alt, strlen(fc->alt));
|
||||||
else if (fc->name)
|
else if (fc->name)
|
||||||
put_chrs(html_context, fc->name, strlen(fc->name));
|
put_chrs(html_context, fc->name, strlen(fc->name));
|
||||||
else
|
else
|
||||||
put_chrs(html_context, "Submit", 6);
|
put_chrs(html_context, "Submit", 6);
|
||||||
|
format.style.attr &= ~AT_NO_ENTITIES;
|
||||||
|
|
||||||
put_chrs(html_context, " ]", 7);
|
put_chrs(html_context, " ]", 7);
|
||||||
break;
|
break;
|
||||||
@ -247,8 +249,11 @@ html_input_format(struct html_context *html_context, unsigned char *a,
|
|||||||
case FC_BUTTON:
|
case FC_BUTTON:
|
||||||
format.style.attr |= AT_BOLD;
|
format.style.attr |= AT_BOLD;
|
||||||
put_chrs(html_context, "[ ", 7);
|
put_chrs(html_context, "[ ", 7);
|
||||||
if (fc->default_value)
|
if (fc->default_value) {
|
||||||
|
format.style.attr |= AT_NO_ENTITIES;
|
||||||
put_chrs(html_context, fc->default_value, strlen(fc->default_value));
|
put_chrs(html_context, fc->default_value, strlen(fc->default_value));
|
||||||
|
format.style.attr &= ~AT_NO_ENTITIES;
|
||||||
|
}
|
||||||
put_chrs(html_context, " ]", 7);
|
put_chrs(html_context, " ]", 7);
|
||||||
break;
|
break;
|
||||||
case FC_TEXTAREA:
|
case FC_TEXTAREA:
|
||||||
|
@ -97,7 +97,8 @@ html_superscript(struct html_context *html_context, unsigned char *a,
|
|||||||
put_chrs(html_context, "^", 1);
|
put_chrs(html_context, "^", 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: Add more languages. */
|
/* TODO: Add more languages.
|
||||||
|
* Entities can be used in these strings. */
|
||||||
static unsigned char *quote_char[2] = { "\"", "'" };
|
static unsigned char *quote_char[2] = { "\"", "'" };
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -193,17 +193,21 @@ static void
|
|||||||
put_image_label(unsigned char *a, unsigned char *label,
|
put_image_label(unsigned char *a, unsigned char *label,
|
||||||
struct html_context *html_context)
|
struct html_context *html_context)
|
||||||
{
|
{
|
||||||
color_T fg;
|
color_T saved_foreground;
|
||||||
|
enum text_style_format saved_attr;
|
||||||
|
|
||||||
/* This is not 100% appropriate for <img>, but well, accepting
|
/* This is not 100% appropriate for <img>, but well, accepting
|
||||||
* accesskey and tabindex near <img> is just our little
|
* accesskey and tabindex near <img> is just our little
|
||||||
* extension to the standard. After all, it makes sense. */
|
* extension to the standard. After all, it makes sense. */
|
||||||
html_focusable(html_context, a);
|
html_focusable(html_context, a);
|
||||||
|
|
||||||
fg = format.style.color.foreground;
|
saved_foreground = format.style.color.foreground;
|
||||||
|
saved_attr = format.style.attr;
|
||||||
format.style.color.foreground = format.color.image_link;
|
format.style.color.foreground = format.color.image_link;
|
||||||
|
format.style.attr |= AT_NO_ENTITIES;
|
||||||
put_chrs(html_context, label, strlen(label));
|
put_chrs(html_context, label, strlen(label));
|
||||||
format.style.color.foreground = fg;
|
format.style.color.foreground = saved_foreground;
|
||||||
|
format.style.attr = saved_attr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -354,6 +358,7 @@ html_img(struct html_context *html_context, unsigned char *a,
|
|||||||
html_img_do(a, NULL, html_context);
|
html_img_do(a, NULL, html_context);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* prefix can have entities in it, but linkname cannot. */
|
||||||
void
|
void
|
||||||
put_link_line(unsigned char *prefix, unsigned char *linkname,
|
put_link_line(unsigned char *prefix, unsigned char *linkname,
|
||||||
unsigned char *link, unsigned char *target,
|
unsigned char *link, unsigned char *target,
|
||||||
@ -370,14 +375,10 @@ put_link_line(unsigned char *prefix, unsigned char *linkname,
|
|||||||
format.link = join_urls(html_context->base_href, link);
|
format.link = join_urls(html_context->base_href, link);
|
||||||
format.target = stracpy(target);
|
format.target = stracpy(target);
|
||||||
format.style.color.foreground = format.color.clink;
|
format.style.color.foreground = format.color.clink;
|
||||||
/* FIXME: linkname typically comes from get_attr_val, which
|
/* linkname typically comes from get_attr_val, which
|
||||||
* has already converted it from the document charset to the
|
* has already expanded character entity references.
|
||||||
* terminal charset and expanded character entity references.
|
* Tell put_chrs not to expand them again. */
|
||||||
* The following put_chrs call again converts the characters
|
format.style.attr |= AT_NO_ENTITIES;
|
||||||
* and expands entity references. So if we have
|
|
||||||
* <meta http-equiv="refresh" content="3; url=foo?&lt" />
|
|
||||||
* then ELinks will display "foo?<" rather than "foo?<".
|
|
||||||
* This was mentioned in bug 213. */
|
|
||||||
put_chrs(html_context, linkname, strlen(linkname));
|
put_chrs(html_context, linkname, strlen(linkname));
|
||||||
ln_break(html_context, 1);
|
ln_break(html_context, 1);
|
||||||
pop_html_element(html_context);
|
pop_html_element(html_context);
|
||||||
|
@ -1497,7 +1497,8 @@ put_chars_conv(struct html_context *html_context,
|
|||||||
|
|
||||||
convert_string(renderer_context.convert_table, chars, charslen,
|
convert_string(renderer_context.convert_table, chars, charslen,
|
||||||
html_context->options->cp,
|
html_context->options->cp,
|
||||||
CSM_DEFAULT, NULL, (void (*)(void *, unsigned char *, int)) put_chars, html_context);
|
(format.style.attr & AT_NO_ENTITIES) ? CSM_NONE : CSM_DEFAULT,
|
||||||
|
NULL, (void (*)(void *, unsigned char *, int)) put_chars, html_context);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
@ -2442,6 +2443,8 @@ render_html_document(struct cache_entry *cached, struct document *document,
|
|||||||
html_context->doc_cp = document->cp;
|
html_context->doc_cp = document->cp;
|
||||||
|
|
||||||
if (title.length) {
|
if (title.length) {
|
||||||
|
/* CSM_DEFAULT because init_html_parser() did not
|
||||||
|
* decode entities in the title. */
|
||||||
document->title = convert_string(renderer_context.convert_table,
|
document->title = convert_string(renderer_context.convert_table,
|
||||||
title.source, title.length,
|
title.source, title.length,
|
||||||
document->options.cp,
|
document->options.cp,
|
||||||
|
@ -1424,10 +1424,12 @@ get_current_link_title(struct document_view *doc_view)
|
|||||||
convert_table = get_translation_table(doc_view->document->cp,
|
convert_table = get_translation_table(doc_view->document->cp,
|
||||||
doc_view->document->options.cp);
|
doc_view->document->options.cp);
|
||||||
|
|
||||||
|
/* CSM_NONE because any entities in the title have
|
||||||
|
* already been decoded. */
|
||||||
link_title = convert_string(convert_table, link->title,
|
link_title = convert_string(convert_table, link->title,
|
||||||
strlen(link->title),
|
strlen(link->title),
|
||||||
doc_view->document->options.cp,
|
doc_view->document->options.cp,
|
||||||
CSM_DEFAULT, NULL, NULL, NULL);
|
CSM_NONE, NULL, NULL, NULL);
|
||||||
/* Remove illicit chars. */
|
/* Remove illicit chars. */
|
||||||
#ifdef CONFIG_UTF8
|
#ifdef CONFIG_UTF8
|
||||||
if (link_title && !doc_view->document->options.utf8)
|
if (link_title && !doc_view->document->options.utf8)
|
||||||
|
Loading…
Reference in New Issue
Block a user