diff --git a/NEWS b/NEWS
index be3b7db3..e0f89868 100644
--- a/NEWS
+++ b/NEWS
@@ -47,6 +47,8 @@ Miscellaneous:
supported UTF-8 as the dump charset.)
* Really retry forever when connection.retries = 0.
* minor bug 1113: Fix a small memory leak if a mailcap file is malformed.
+* minor bug 1114: Decode SGML entities and NCRs only once in link/@title
+ and other attributes.
* enhancement: Session-specific options. Any options changed with
toggle-* actions no longer affect other tabs or other terminals.
* Do not crash when document.browse.minimum_refresh_time = 0 and
diff --git a/src/document/document.h b/src/document/document.h
index f6e2461b..0a24280a 100644
--- a/src/document/document.h
+++ b/src/document/document.h
@@ -91,6 +91,9 @@ struct link {
unsigned char *where;
unsigned char *target;
unsigned char *where_img;
+
+ /** The title of the link. This is in the document charset,
+ * and entities have already been decoded. */
unsigned char *title;
/** The set of characters belonging to this link (their coordinates
diff --git a/src/document/format.h b/src/document/format.h
index fee79dcf..defd2ada 100644
--- a/src/document/format.h
+++ b/src/document/format.h
@@ -14,6 +14,11 @@ enum text_style_format {
AT_FIXED = 8,
AT_GRAPHICS = 16,
AT_PREFORMATTED = 32,
+
+ /* AT_NO_ENTITIES means the parser has already expanded
+ * entities and numeric character references, so the put_chars
+ * function of the renderer must not do that again. */
+ AT_NO_ENTITIES = 64,
};
struct text_style_color {
diff --git a/src/document/html/parser.c b/src/document/html/parser.c
index 7ebc582f..4b89b842 100644
--- a/src/document/html/parser.c
+++ b/src/document/html/parser.c
@@ -662,8 +662,10 @@ look_for_link(unsigned char **pos, unsigned char *eof, struct menu_item **menu,
unsigned char *alt = get_attr_val(attr, "alt", options->cp);
if (alt) {
+ /* CSM_NONE because get_attr_val() already
+ * decoded entities. */
label = convert_string(ct, alt, strlen(alt),
- options->cp, CSM_DEFAULT,
+ options->cp, CSM_NONE,
NULL, NULL, NULL);
mem_free(alt);
} else {
@@ -864,7 +866,11 @@ done_html_parser_state(struct html_context *html_context,
/* This function does not set html_context.doc_cp = document.cp,
* because it does not know the document, and because the codepage has
- * not even been decided when it is called. */
+ * not even been decided when it is called.
+ *
+ * @param[out] title
+ * The title of the document. This is in the document charset,
+ * and entities have not been decoded. */
struct html_context *
init_html_parser(struct uri *uri, struct document_options *options,
unsigned char *start, unsigned char *end,
diff --git a/src/document/html/parser.h b/src/document/html/parser.h
index 8f99befc..f989f7a0 100644
--- a/src/document/html/parser.h
+++ b/src/document/html/parser.h
@@ -39,7 +39,10 @@ struct text_attrib {
unsigned char *link;
unsigned char *target;
unsigned char *image;
+
+ /* Any entities in the title have already been decoded. */
unsigned char *title;
+
struct form_control *form;
struct text_attrib_color color;
diff --git a/src/document/html/parser/forms.c b/src/document/html/parser/forms.c
index 5f9dc475..d40a569a 100644
--- a/src/document/html/parser/forms.c
+++ b/src/document/html/parser/forms.c
@@ -232,12 +232,14 @@ html_input_format(struct html_context *html_context, unsigned char *a,
}
format.style.attr |= AT_BOLD;
put_chrs(html_context, "[ ", 7);
+ format.style.attr |= AT_NO_ENTITIES;
if (fc->alt)
put_chrs(html_context, fc->alt, strlen(fc->alt));
else if (fc->name)
put_chrs(html_context, fc->name, strlen(fc->name));
else
put_chrs(html_context, "Submit", 6);
+ format.style.attr &= ~AT_NO_ENTITIES;
put_chrs(html_context, " ]", 7);
break;
@@ -247,8 +249,11 @@ html_input_format(struct html_context *html_context, unsigned char *a,
case FC_BUTTON:
format.style.attr |= AT_BOLD;
put_chrs(html_context, "[ ", 7);
- if (fc->default_value)
+ if (fc->default_value) {
+ format.style.attr |= AT_NO_ENTITIES;
put_chrs(html_context, fc->default_value, strlen(fc->default_value));
+ format.style.attr &= ~AT_NO_ENTITIES;
+ }
put_chrs(html_context, " ]", 7);
break;
case FC_TEXTAREA:
diff --git a/src/document/html/parser/general.c b/src/document/html/parser/general.c
index 5f811832..7e8de820 100644
--- a/src/document/html/parser/general.c
+++ b/src/document/html/parser/general.c
@@ -97,7 +97,8 @@ html_superscript(struct html_context *html_context, unsigned char *a,
put_chrs(html_context, "^", 1);
}
-/* TODO: Add more languages. */
+/* TODO: Add more languages.
+ * Entities can be used in these strings. */
static unsigned char *quote_char[2] = { "\"", "'" };
void
diff --git a/src/document/html/parser/link.c b/src/document/html/parser/link.c
index 6a6435ef..02e68cc3 100644
--- a/src/document/html/parser/link.c
+++ b/src/document/html/parser/link.c
@@ -193,17 +193,21 @@ static void
put_image_label(unsigned char *a, unsigned char *label,
struct html_context *html_context)
{
- color_T fg;
+ color_T saved_foreground;
+ enum text_style_format saved_attr;
/* This is not 100% appropriate for , but well, accepting
* accesskey and tabindex near is just our little
* extension to the standard. After all, it makes sense. */
html_focusable(html_context, a);
- fg = format.style.color.foreground;
+ saved_foreground = format.style.color.foreground;
+ saved_attr = format.style.attr;
format.style.color.foreground = format.color.image_link;
+ format.style.attr |= AT_NO_ENTITIES;
put_chrs(html_context, label, strlen(label));
- format.style.color.foreground = fg;
+ format.style.color.foreground = saved_foreground;
+ format.style.attr = saved_attr;
}
static void
@@ -354,6 +358,7 @@ html_img(struct html_context *html_context, unsigned char *a,
html_img_do(a, NULL, html_context);
}
+/* prefix can have entities in it, but linkname cannot. */
void
put_link_line(unsigned char *prefix, unsigned char *linkname,
unsigned char *link, unsigned char *target,
@@ -370,14 +375,10 @@ put_link_line(unsigned char *prefix, unsigned char *linkname,
format.link = join_urls(html_context->base_href, link);
format.target = stracpy(target);
format.style.color.foreground = format.color.clink;
- /* FIXME: linkname typically comes from get_attr_val, which
- * has already converted it from the document charset to the
- * terminal charset and expanded character entity references.
- * The following put_chrs call again converts the characters
- * and expands entity references. So if we have
- *
- * then ELinks will display "foo?<" rather than "foo?<".
- * This was mentioned in bug 213. */
+ /* linkname typically comes from get_attr_val, which
+ * has already expanded character entity references.
+ * Tell put_chrs not to expand them again. */
+ format.style.attr |= AT_NO_ENTITIES;
put_chrs(html_context, linkname, strlen(linkname));
ln_break(html_context, 1);
pop_html_element(html_context);
diff --git a/src/document/html/renderer.c b/src/document/html/renderer.c
index ec6c3125..30376919 100644
--- a/src/document/html/renderer.c
+++ b/src/document/html/renderer.c
@@ -1497,7 +1497,8 @@ put_chars_conv(struct html_context *html_context,
convert_string(renderer_context.convert_table, chars, charslen,
html_context->options->cp,
- CSM_DEFAULT, NULL, (void (*)(void *, unsigned char *, int)) put_chars, html_context);
+ (format.style.attr & AT_NO_ENTITIES) ? CSM_NONE : CSM_DEFAULT,
+ NULL, (void (*)(void *, unsigned char *, int)) put_chars, html_context);
}
static inline void
@@ -2442,6 +2443,8 @@ render_html_document(struct cache_entry *cached, struct document *document,
html_context->doc_cp = document->cp;
if (title.length) {
+ /* CSM_DEFAULT because init_html_parser() did not
+ * decode entities in the title. */
document->title = convert_string(renderer_context.convert_table,
title.source, title.length,
document->options.cp,
diff --git a/src/viewer/text/link.c b/src/viewer/text/link.c
index a73643bd..86535a55 100644
--- a/src/viewer/text/link.c
+++ b/src/viewer/text/link.c
@@ -1424,10 +1424,12 @@ get_current_link_title(struct document_view *doc_view)
convert_table = get_translation_table(doc_view->document->cp,
doc_view->document->options.cp);
+ /* CSM_NONE because any entities in the title have
+ * already been decoded. */
link_title = convert_string(convert_table, link->title,
strlen(link->title),
doc_view->document->options.cp,
- CSM_DEFAULT, NULL, NULL, NULL);
+ CSM_NONE, NULL, NULL, NULL);
/* Remove illicit chars. */
#ifdef CONFIG_UTF8
if (link_title && !doc_view->document->options.utf8)