1
0
mirror of https://github.com/rkd77/elinks.git synced 2025-02-02 15:09:23 -05:00
Witold Filipczyk fd70a5b352 get_attr_value: do not do trim_chars
trim_chars was called only in debug mode and the results of the get_attr_val
for value=" something " in debug mode differ from normal and fastmem mode.

[ From commit e347122e6dff75dcb7fc1dc877c77ad16ed13c07 in ELinks
  0.12.GIT.  --KON ]
2007-03-18 13:50:34 +02:00

1069 lines
31 KiB
C

/* HTML core parser routines */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "elinks.h"
#include "document/css/apply.h"
#include "document/css/parser.h"
#include "document/html/parser/forms.h"
#include "document/html/parser/general.h"
#include "document/html/parser/link.h"
#include "document/html/parser/parse.h"
#include "document/html/parser/stack.h"
#include "document/html/parser.h"
#include "document/options.h"
#include "intl/charsets.h"
#include "util/conv.h"
#include "util/error.h"
#include "util/fastfind.h"
#include "util/memdebug.h"
#include "util/memory.h"
#include "util/string.h"
/* Unsafe macros */
#include "document/html/internal.h"
#define end_of_tag(c) ((c) == '>' || (c) == '<')
static inline int
atchr(register unsigned char c)
{
return (c < 127 && (c > '>' || (c > ' ' && c != '=' && !end_of_tag(c))));
}
/* This function eats one html element. */
/* - e is pointer to the begining of the element (*e must be '<')
* - eof is pointer to the end of scanned area
* - parsed element name is stored in name, it's length is namelen
* - first attribute is stored in attr
* - end points to first character behind the html element */
/* It returns -1 when it failed (returned values in pointers are invalid) and
* 0 for success. */
int
parse_element(register unsigned char *e, unsigned char *eof,
unsigned char **name, int *namelen,
unsigned char **attr, unsigned char **end)
{
#define next_char() if (++e == eof) return -1;
assert(e && eof);
if (e >= eof || *e != '<') return -1;
next_char();
if (name) *name = e;
if (*e == '/') next_char();
if (!isident(*e)) return -1;
while (isident(*e)) next_char();
if (!isspace(*e) && !end_of_tag(*e) && *e != '/' && *e != ':' && *e != '=')
return -1;
if (name && namelen) *namelen = e - *name;
while (isspace(*e) || *e == '/' || *e == ':') next_char();
/* Skip bad attribute */
while (!atchr(*e) && !end_of_tag(*e) && !isspace(*e)) next_char();
if (attr) *attr = e;
next_attr:
while (isspace(*e)) next_char();
/* Skip bad attribute */
while (!atchr(*e) && !end_of_tag(*e) && !isspace(*e)) next_char();
if (end_of_tag(*e)) goto end;
while (atchr(*e)) next_char();
while (isspace(*e)) next_char();
if (*e != '=') {
if (end_of_tag(*e)) goto end;
goto next_attr;
}
next_char();
while (isspace(*e)) next_char();
if (isquote(*e)) {
unsigned char quote = *e;
/* quoted_value: */
next_char();
while (*e != quote) next_char();
next_char();
/* The following apparently handles the case of <foo
* id="a""b">, however that is very rare and probably not
* conforming. More frequent (and mishandling it more fatal) is
* probably the typo of <foo id="a""> - we can handle it as
* long as this is commented out. --pasky */
/* if (*e == quote) goto quoted_value; */
} else {
while (!isspace(*e) && !end_of_tag(*e)) next_char();
}
while (isspace(*e)) next_char();
if (!end_of_tag(*e)) goto next_attr;
end:
if (end) *end = e + (*e == '>');
return 0;
}
#define realloc_chrs(x, l) mem_align_alloc(x, l, (l) + 1, unsigned char, 0xFF)
#define add_chr(s, l, c) \
do { \
if (!realloc_chrs(&(s), l)) return NULL; \
(s)[(l)++] = (c); \
} while (0)
unsigned char *
get_attr_value(register unsigned char *e, unsigned char *name,
struct document_options *options, enum html_attr_flags flags)
{
unsigned char *n;
unsigned char *name_start;
unsigned char *attr = NULL;
int attrlen = 0;
int found;
next_attr:
skip_space(e);
if (end_of_tag(*e) || !atchr(*e)) goto parse_error;
n = name;
name_start = e;
while (atchr(*n) && atchr(*e) && toupper(*e) == toupper(*n)) e++, n++;
found = !*n && !atchr(*e);
if (found && (flags & HTML_ATTR_TEST)) return name_start;
while (atchr(*e)) e++;
skip_space(e);
if (*e != '=') {
if (found) goto found_endattr;
goto next_attr;
}
e++;
skip_space(e);
if (found) {
if (!isquote(*e)) {
while (!isspace(*e) && !end_of_tag(*e)) {
if (!*e) goto parse_error;
add_chr(attr, attrlen, *e);
e++;
}
} else {
unsigned char quote = *e;
/* parse_quoted_value: */
while (*(++e) != quote) {
if (*e == ASCII_CR) continue;
if (!*e) goto parse_error;
if (*e != ASCII_TAB && *e != ASCII_LF)
add_chr(attr, attrlen, *e);
else if (!(flags & HTML_ATTR_EAT_NL))
add_chr(attr, attrlen, ' ');
}
e++;
/* The following apparently handles the case of <foo
* id="a""b">, however that is very rare and probably
* not conforming. More frequent (and mishandling it
* more fatal) is probably the typo of <foo id="a""> -
* we can handle it as long as this is commented out.
* --pasky */
#if 0
if (*e == quote) {
add_chr(attr, attrlen, *e);
goto parse_quoted_value;
}
#endif
}
found_endattr:
add_chr(attr, attrlen, '\0');
attrlen--;
if (/* Unused: !(flags & HTML_ATTR_NO_CONV) && */
memchr(attr, '&', attrlen)) {
unsigned char *saved_attr = attr;
attr = convert_string(NULL, saved_attr, attrlen,
options->cp, CSM_QUERY,
NULL, NULL, NULL);
mem_free(saved_attr);
}
set_mem_comment(attr, name, strlen(name));
return attr;
} else {
if (!isquote(*e)) {
while (!isspace(*e) && !end_of_tag(*e)) {
if (!*e) goto parse_error;
e++;
}
} else {
unsigned char quote = *e;
do {
while (*(++e) != quote)
if (!*e) goto parse_error;
e++;
} while (/* See above. *e == quote */ 0);
}
}
goto next_attr;
parse_error:
mem_free_if(attr);
return NULL;
}
#undef add_chr
/* Extract numerical value of attribute @name.
* It will return a positive integer value on success,
* or -1 on error. */
int
get_num(unsigned char *a, unsigned char *name, struct document_options *options)
{
unsigned char *al = get_attr_val(a, name, options);
int result = -1;
if (al) {
unsigned char *end;
long num;
errno = 0;
num = strtol(al, (char **) &end, 10);
if (!errno && *al && !*end && num >= 0 && num <= INT_MAX)
result = (int) num;
mem_free(al);
}
return result;
}
/* Parse 'width[%],....'-like attribute @name of element @a. If @limited is
* set, it will limit the width value to the current usable width. Note that
* @limited must be set to be able to parse percentage widths. */
/* The function returns width in characters or -1 in case of error. */
int
get_width(unsigned char *a, unsigned char *name, int limited,
struct html_context *html_context)
{
unsigned char *value = get_attr_val(a, name, html_context->options);
unsigned char *str = value;
unsigned char *end;
int percentage = 0;
int len;
long width;
if (!value) return -1;
/* Skip spaces at start of string if any. */
skip_space(str);
/* Search for end of string or ',' character (ie. in "100,200") */
for (len = 0; str[len] && str[len] != ','; len++);
/* Go back, and skip spaces after width if any. */
while (len && isspace(str[len - 1])) len--;
if (!len) { mem_free(value); return -1; } /* Nothing to parse. */
/* Is this a percentage ? */
if (str[len - 1] == '%') len--, percentage = 1;
/* Skip spaces between width number and percentage if any. */
while (len && isspace(str[len - 1])) len--;
if (!len) { mem_free(value); return -1; } /* Nothing to parse. */
/* Shorten the string a bit, so strtoul() will work on useful
* part of it. */
str[len] = '\0';
/* Convert to number if possible. */
errno = 0;
width = strtoul((char *) str, (char **) &end, 10);
/* @end points into the @value string so check @end position
* before freeing @value. */
if (errno || *end || width >= INT_MAX) {
/* Not a valid number. */
mem_free(value);
return -1;
}
mem_free(value);
#define WIDTH_PIXELS2CHARS(width) ((width) + (HTML_CHAR_WIDTH - 1) / 2) / HTML_CHAR_WIDTH;
if (limited) {
int maxwidth = get_html_max_width();
if (percentage) {
/* Value is a percentage. */
width = width * maxwidth / 100;
} else {
/* Value is a number of pixels, makes an approximation. */
width = WIDTH_PIXELS2CHARS(width);
}
if (width > maxwidth)
width = maxwidth;
} else {
if (percentage) {
/* No sense, we need @limited and @maxwidth for percentage. */
return -1;
} else {
/* Value is a number of pixels, makes an approximation,
* no limit here */
width = WIDTH_PIXELS2CHARS(width);
}
}
#undef WIDTH_PIXELS2CHARS
if (width < 0)
width = 0;
return width;
}
unsigned char *
skip_comment(unsigned char *html, unsigned char *eof)
{
if (html + 4 <= eof && html[2] == '-' && html[3] == '-') {
html += 4;
while (html < eof) {
if (html + 2 <= eof && html[0] == '-' && html[1] == '-') {
html += 2;
while (html < eof && *html == '-') html++;
while (html < eof && isspace(*html)) html++;
if (html >= eof) return eof;
if (*html == '>') return html + 1;
continue;
}
html++;
}
} else {
html += 2;
while (html < eof) {
if (html[0] == '>') return html + 1;
html++;
}
}
return eof;
}
enum element_type {
ELEMENT_TYPE_NESTABLE,
ELEMENT_TYPE_NON_NESTABLE,
ELEMENT_TYPE_NON_PAIRABLE,
ELEMENT_TYPE_LI,
};
struct element_info {
/* Element name, uppercase. */
unsigned char *name;
/* Element handler. This does the relevant arguments processing and
* formatting (by calling renderer hooks). Note that in a few cases,
* this is just a placeholder and the element is given special care
* in start_element() (which is also where we call these handlers). */
element_handler_T *func;
/* How many line-breaks to ensure we have before and after an element.
* Value of 1 means the element will be on a line on its own, value
* of 2 means that it will also have empty lines before and after.
* Note that this does not add up - it just ensures that there is
* at least so many linebreaks, but does not add more if that is the
* case. Therefore, something like e.g. </pre></p> will add only two
* linebreaks, not four. */
/* In some stack killing logic, we use some weird heuristic based on
* whether an element is block or inline. That is determined from
* whether this attribute is zero on non-zero. */
int linebreak;
enum element_type type;
};
static struct element_info elements[] = {
{"A", html_a, 0, ELEMENT_TYPE_NON_NESTABLE},
{"ABBR", html_italic, 0, ELEMENT_TYPE_NESTABLE },
{"ADDRESS", html_address, 2, ELEMENT_TYPE_NESTABLE },
{"APPLET", html_applet, 1, ELEMENT_TYPE_NON_PAIRABLE},
{"B", html_bold, 0, ELEMENT_TYPE_NESTABLE },
{"BASE", html_base, 0, ELEMENT_TYPE_NON_PAIRABLE},
{"BASEFONT", html_font, 0, ELEMENT_TYPE_NON_PAIRABLE},
{"BLOCKQUOTE", html_blockquote, 2, ELEMENT_TYPE_NESTABLE },
{"BODY", html_body, 0, ELEMENT_TYPE_NESTABLE },
{"BR", html_br, 1, ELEMENT_TYPE_NON_PAIRABLE},
{"BUTTON", html_button, 0, ELEMENT_TYPE_NESTABLE },
{"CAPTION", html_center, 1, ELEMENT_TYPE_NESTABLE },
{"CENTER", html_center, 1, ELEMENT_TYPE_NESTABLE },
{"CODE", html_fixed, 0, ELEMENT_TYPE_NESTABLE },
{"DD", html_dd, 1, ELEMENT_TYPE_NON_PAIRABLE},
{"DFN", html_bold, 0, ELEMENT_TYPE_NESTABLE },
{"DIR", html_ul, 2, ELEMENT_TYPE_NESTABLE },
{"DIV", html_linebrk, 1, ELEMENT_TYPE_NESTABLE },
{"DL", html_dl, 2, ELEMENT_TYPE_NESTABLE },
{"DT", html_dt, 1, ELEMENT_TYPE_NON_PAIRABLE},
{"EM", html_italic, 0, ELEMENT_TYPE_NESTABLE },
{"EMBED", html_embed, 0, ELEMENT_TYPE_NON_PAIRABLE},
{"FIXED", html_fixed, 0, ELEMENT_TYPE_NESTABLE },
{"FONT", html_font, 0, ELEMENT_TYPE_NESTABLE },
{"FORM", html_form, 1, ELEMENT_TYPE_NESTABLE },
{"FRAME", html_frame, 1, ELEMENT_TYPE_NON_PAIRABLE},
{"FRAMESET", html_frameset, 1, ELEMENT_TYPE_NESTABLE },
{"H1", html_h1, 2, ELEMENT_TYPE_NON_NESTABLE},
{"H2", html_h2, 2, ELEMENT_TYPE_NON_NESTABLE},
{"H3", html_h3, 2, ELEMENT_TYPE_NON_NESTABLE},
{"H4", html_h4, 2, ELEMENT_TYPE_NON_NESTABLE},
{"H5", html_h5, 2, ELEMENT_TYPE_NON_NESTABLE},
{"H6", html_h6, 2, ELEMENT_TYPE_NON_NESTABLE},
{"HEAD", html_head, 0, ELEMENT_TYPE_NESTABLE },
{"HR", html_hr, 2, ELEMENT_TYPE_NON_PAIRABLE},
{"HTML", html_html, 0, ELEMENT_TYPE_NESTABLE },
{"I", html_italic, 0, ELEMENT_TYPE_NESTABLE },
{"IFRAME", html_iframe, 1, ELEMENT_TYPE_NON_PAIRABLE},
{"IMG", html_img, 0, ELEMENT_TYPE_NON_PAIRABLE},
{"INPUT", html_input, 0, ELEMENT_TYPE_NON_PAIRABLE},
{"LI", html_li, 1, ELEMENT_TYPE_LI },
{"LINK", html_link, 1, ELEMENT_TYPE_NON_PAIRABLE},
{"LISTING", html_pre, 2, ELEMENT_TYPE_NESTABLE },
{"MENU", html_ul, 2, ELEMENT_TYPE_NESTABLE },
{"META", html_meta, 0, ELEMENT_TYPE_NON_PAIRABLE},
{"NOFRAMES", html_noframes, 0, ELEMENT_TYPE_NESTABLE },
{"NOSCRIPT", html_noscript, 0, ELEMENT_TYPE_NESTABLE },
{"OBJECT", html_object, 1, ELEMENT_TYPE_NON_PAIRABLE},
{"OL", html_ol, 2, ELEMENT_TYPE_NESTABLE },
{"OPTION", html_option, 1, ELEMENT_TYPE_NON_PAIRABLE},
{"P", html_p, 2, ELEMENT_TYPE_NON_NESTABLE},
{"PRE", html_pre, 2, ELEMENT_TYPE_NESTABLE },
{"Q", html_italic, 0, ELEMENT_TYPE_NESTABLE },
{"S", html_underline, 0, ELEMENT_TYPE_NESTABLE },
{"SCRIPT", html_script, 0, ELEMENT_TYPE_NESTABLE },
{"SELECT", html_select, 0, ELEMENT_TYPE_NESTABLE },
{"SPAN", html_span, 0, ELEMENT_TYPE_NESTABLE },
{"STRIKE", html_underline, 0, ELEMENT_TYPE_NESTABLE },
{"STRONG", html_bold, 0, ELEMENT_TYPE_NESTABLE },
{"STYLE", html_style, 0, ELEMENT_TYPE_NESTABLE },
{"SUB", html_subscript, 0, ELEMENT_TYPE_NESTABLE },
{"SUP", html_superscript, 0, ELEMENT_TYPE_NESTABLE },
{"TABLE", html_table, 2, ELEMENT_TYPE_NESTABLE },
{"TD", html_td, 0, ELEMENT_TYPE_NESTABLE },
{"TEXTAREA", html_textarea, 0, ELEMENT_TYPE_NON_PAIRABLE},
{"TH", html_th, 0, ELEMENT_TYPE_NESTABLE },
{"TITLE", html_title, 0, ELEMENT_TYPE_NESTABLE },
{"TR", html_tr, 1, ELEMENT_TYPE_NESTABLE },
{"TT", html_tt, 0, ELEMENT_TYPE_NON_NESTABLE},
{"U", html_underline, 0, ELEMENT_TYPE_NESTABLE },
{"UL", html_ul, 2, ELEMENT_TYPE_NESTABLE },
{"XMP", html_xmp, 2, ELEMENT_TYPE_NESTABLE },
{NULL, NULL, 0, ELEMENT_TYPE_NESTABLE },
};
#define NUMBER_OF_TAGS (sizeof_array(elements) - 1)
#ifndef USE_FASTFIND
static int
compar(const void *a, const void *b)
{
return strcasecmp(((struct element_info *) a)->name,
((struct element_info *) b)->name);
}
#else
static struct element_info *internal_pointer;
/* Reset internal list pointer */
static void
tags_list_reset(void)
{
internal_pointer = elements;
}
/* Returns a pointer to a struct that contains
* current key and data pointers and increment
* internal pointer.
* It returns NULL when key is NULL. */
static struct fastfind_key_value *
tags_list_next(void)
{
static struct fastfind_key_value kv;
if (!internal_pointer->name) return NULL;
kv.key = internal_pointer->name;
kv.data = internal_pointer;
internal_pointer++;
return &kv;
}
static struct fastfind_index ff_tags_index
= INIT_FASTFIND_INDEX("tags_lookup", tags_list_reset, tags_list_next);
#endif /* USE_FASTFIND */
void
init_tags_lookup(void)
{
#ifdef USE_FASTFIND
fastfind_index(&ff_tags_index, FF_COMPRESS);
#endif
}
void
free_tags_lookup(void)
{
#ifdef USE_FASTFIND
fastfind_done(&ff_tags_index);
#endif
}
static unsigned char *process_element(unsigned char *name, int namelen, int endingtag,
unsigned char *html, unsigned char *prev_html,
unsigned char *eof, unsigned char *attr,
struct html_context *html_context);
void
parse_html(unsigned char *html, unsigned char *eof,
struct part *part, unsigned char *head,
struct html_context *html_context)
{
unsigned char *base_pos = html;
int noupdate = 0;
html_context->putsp = HTML_SPACE_SUPPRESS;
html_context->line_breax = html_context->table_level ? 2 : 1;
html_context->position = 0;
html_context->was_br = 0;
html_context->was_li = 0;
html_context->was_body = 0;
/* html_context->was_body_background = 0; */
html_context->part = part;
html_context->eoff = eof;
if (head) process_head(html_context, head);
main_loop:
while (html < eof) {
unsigned char *name, *attr, *end;
int namelen, endingtag;
int dotcounter = 0;
if (!noupdate) {
html_context->part = part;
html_context->eoff = eof;
base_pos = html;
} else {
noupdate = 0;
}
if (isspace(*html) && !html_is_preformatted()) {
unsigned char *h = html;
while (h < eof && isspace(*h))
h++;
if (h + 1 < eof && h[0] == '<' && h[1] == '/') {
if (!parse_element(h, eof, &name, &namelen, &attr, &end)) {
put_chrs(html_context, base_pos, html - base_pos);
base_pos = html = h;
html_context->putsp = HTML_SPACE_ADD;
goto element;
}
}
html++;
if (!(html_context->position + (html - base_pos - 1)))
goto skip_w; /* ??? */
if (*(html - 1) == ' ') { /* Do not replace with isspace() ! --Zas */
/* BIG performance win; not sure if it doesn't cause any bug */
if (html < eof && !isspace(*html)) {
noupdate = 1;
continue;
}
put_chrs(html_context, base_pos, html - base_pos);
} else {
put_chrs(html_context, base_pos, html - base_pos - 1);
put_chrs(html_context, " ", 1);
}
skip_w:
while (html < eof && isspace(*html))
html++;
continue;
}
if (html_is_preformatted()) {
html_context->putsp = HTML_SPACE_NORMAL;
if (*html == ASCII_TAB) {
put_chrs(html_context, base_pos, html - base_pos);
put_chrs(html_context, " ",
8 - (html_context->position % 8));
html++;
continue;
} else if (*html == ASCII_CR || *html == ASCII_LF) {
put_chrs(html_context, base_pos, html - base_pos);
if (html - base_pos == 0 && html_context->line_breax > 0)
html_context->line_breax--;
next_break:
if (*html == ASCII_CR && html < eof - 1
&& html[1] == ASCII_LF)
html++;
ln_break(html_context, 1);
html++;
if (*html == ASCII_CR || *html == ASCII_LF) {
html_context->line_breax = 0;
goto next_break;
}
continue;
} else if (html + 5 < eof && *html == '&') {
/* Really nasty hack to make &#13; handling in
* <pre>-tags lynx-compatible. It works around
* the entity handling done in the renderer,
* since checking #13 value there would require
* something along the lines of NBSP_CHAR or
* checking for '\n's in AT_PREFORMATTED text. */
/* See bug 52 and 387 for more info. */
int length = html - base_pos;
int newlines = 0;
while ((html + 5 < eof && html[0] == '&' && html[1] == '#')
&& (!memcmp(html + 2, "13;", 3)
|| (html + 6 < eof && !strncasecmp(html + 2, "x0a;", 4)))) {
newlines++;
html += 5 + (html[4] != ';');
}
if (newlines) {
put_chrs(html_context, base_pos, length);
ln_break(html_context, newlines);
continue;
}
}
}
while (*html < ' ') {
if (html - base_pos)
put_chrs(html_context, base_pos, html - base_pos);
dotcounter++;
base_pos = ++html;
if (*html >= ' ' || isspace(*html) || html >= eof) {
unsigned char *dots = fmem_alloc(dotcounter);
if (dots) {
memset(dots, '.', dotcounter);
put_chrs(html_context, dots, dotcounter);
fmem_free(dots);
}
goto main_loop;
}
}
if (html + 2 <= eof && html[0] == '<' && (html[1] == '!' || html[1] == '?')
&& !(html_context->was_xmp || html_context->was_style)) {
put_chrs(html_context, base_pos, html - base_pos);
html = skip_comment(html, eof);
continue;
}
if (*html != '<' || parse_element(html, eof, &name, &namelen, &attr, &end)) {
html++;
noupdate = 1;
continue;
}
element:
endingtag = *name == '/'; name += endingtag; namelen -= endingtag;
if (!endingtag && html_context->putsp == HTML_SPACE_ADD && !html_top.invisible)
put_chrs(html_context, " ", 1);
put_chrs(html_context, base_pos, html - base_pos);
if (!html_is_preformatted() && !endingtag && html_context->putsp == HTML_SPACE_NORMAL) {
unsigned char *ee = end;
unsigned char *nm;
while (!parse_element(ee, eof, &nm, NULL, NULL, &ee))
if (*nm == '/')
goto ng;
if (ee < eof && isspace(*ee)) {
put_chrs(html_context, " ", 1);
}
ng:;
}
html = process_element(name, namelen, endingtag, end, html, eof, attr, html_context);
}
if (noupdate) put_chrs(html_context, base_pos, html - base_pos);
ln_break(html_context, 1);
/* Restore the part in case the html_context was trashed in the last
* iteration so that when destroying the stack in the caller we still
* get the right part pointer. */
html_context->part = part;
html_context->putsp = HTML_SPACE_SUPPRESS;
html_context->position = 0;
html_context->was_br = 0;
}
static unsigned char *
start_element(struct element_info *ei,
unsigned char *name, int namelen,
unsigned char *html,
unsigned char *eof, unsigned char *attr,
struct html_context *html_context)
{
#define ELEMENT_RENDER_PROLOGUE \
ln_break(html_context, ei->linebreak); \
a = get_attr_val(attr, "id", html_context->options); \
if (a) { \
html_context->special_f(html_context, SP_TAG, a); \
mem_free(a); \
}
unsigned char *a;
struct par_attrib old_format;
int restore_format;
#ifdef CONFIG_CSS
struct css_selector *selector = NULL;
#endif
if (html_top.type == ELEMENT_WEAK) {
kill_html_stack_item(html_context, &html_top);
}
/* We try to process nested <script> if we didn't process the parent
* one. */
if (html_top.invisible
&& (ei->func != html_script || html_top.invisible < 2)) {
ELEMENT_RENDER_PROLOGUE
return html;
}
restore_format = html_is_preformatted();
old_format = par_format;
/* Support for <meta refresh="..."> inside <body>. (bug 700) */
if (ei->func == html_meta && html_context->was_body) {
html_handle_body_meta(html_context, name - 1, eof);
html_context->was_body = 0;
}
#ifdef CONFIG_CSS
if (ei->func == html_style && html_context->options->css_enable) {
css_parse_stylesheet(&html_context->css_styles,
html_context->base_href, html, eof);
}
#endif
if (ei->type == ELEMENT_TYPE_NON_NESTABLE
|| ei->type == ELEMENT_TYPE_LI) {
struct html_element *e;
if (ei->type == ELEMENT_TYPE_NON_NESTABLE) {
foreach (e, html_context->stack) {
if (e->type < ELEMENT_KILLABLE) break;
if (is_block_element(e) || is_inline_element(ei)) break;
}
} else foreach (e, html_context->stack) {
if (is_block_element(e) && is_inline_element(ei)) break;
if (e->type < ELEMENT_KILLABLE) break;
if (!strlcasecmp(e->name, e->namelen, name, namelen)) break;
}
if (!strlcasecmp(e->name, e->namelen, name, namelen)) {
while (e->prev != (void *) &html_context->stack)
kill_html_stack_item(html_context, e->prev);
if (e->type > ELEMENT_IMMORTAL)
kill_html_stack_item(html_context, e);
}
}
if (ei->type != ELEMENT_TYPE_NON_PAIRABLE) {
html_stack_dup(html_context, ELEMENT_KILLABLE);
html_top.name = name;
html_top.namelen = namelen;
html_top.options = attr;
html_top.linebreak = ei->linebreak;
#ifdef CONFIG_ECMASCRIPT
if (has_attr(attr, "onClick", html_context->options)) {
/* XXX: Put something better to format.link. --pasky */
mem_free_set(&format.link, stracpy("javascript:void(0);"));
mem_free_set(&format.target, stracpy(html_context->base_target));
format.style.fg = format.clink;
html_top.pseudo_class = ELEMENT_LINK;
mem_free_set(&format.title, stracpy("onClick placeholder"));
/* Er. I know. Well, double html_focusable()s shouldn't
* really hurt. */
html_focusable(html_context, attr);
}
#endif
}
#ifdef CONFIG_CSS
if (html_top.options && html_context->options->css_enable) {
/* XXX: We should apply CSS otherwise as well, but that'll need
* some deeper changes in order to have options filled etc.
* Probably just applying CSS from more places, since we
* usually have type != ELEMENT_TYPE_NESTABLE when we either (1)
* rescan on your own from somewhere else (2) html_stack_dup()
* in our own way. --pasky */
/* Call it now to gain some of the stuff which might affect
* formatting of some elements. */
/* FIXME: The caching of the CSS selector is broken, since t can
* lead to wrong styles being applied to following elements, so
* disabled for now. */
selector = get_css_selector_for_element(html_context, &html_top,
&html_context->css_styles,
&html_context->stack);
if (selector) {
apply_css_selector_style(html_context, &html_top, selector);
done_css_selector(selector);
}
}
/* Now this was the reason for this whole funny ELEMENT_RENDER_PROLOGUE
* bussiness. Only now we have the definitive linebreak value, since
* that's what the display: property plays with. */
#endif
ELEMENT_RENDER_PROLOGUE
if (ei->func) ei->func(html_context, attr, html, eof, &html);
#ifdef CONFIG_CSS
if (selector && html_top.options) {
/* Call it now to override default colors of the elements. */
selector = get_css_selector_for_element(html_context, &html_top,
&html_context->css_styles,
&html_context->stack);
if (selector) {
apply_css_selector_style(html_context, &html_top, selector);
done_css_selector(selector);
}
}
#endif
if (ei->func != html_br) html_context->was_br = 0;
if (restore_format) par_format = old_format;
return html;
#undef ELEMENT_RENDER_PROLOGUE
}
static unsigned char *
end_element(struct element_info *ei,
unsigned char *name, int namelen,
unsigned char *html,
unsigned char *eof, unsigned char *attr,
struct html_context *html_context)
{
struct html_element *e, *elt;
int lnb = 0;
int kill = 0;
if (ei->func == html_xmp) html_context->was_xmp = 0;
if (ei->func == html_style) html_context->was_style = 0;
html_context->was_br = 0;
if (ei->type == ELEMENT_TYPE_NON_PAIRABLE
|| ei->type == ELEMENT_TYPE_LI)
return html;
/* Apply background color from the <HTML> element. (bug 696) */
if (ei->func == html_html
&& html_top.type >= ELEMENT_KILLABLE
&& !html_context->was_body_background)
html_apply_canvas_bgcolor(html_context);
/* dump_html_stack(html_context); */
foreach (e, html_context->stack) {
if (is_block_element(e) && is_inline_element(ei)) kill = 1;
if (strlcasecmp(e->name, e->namelen, name, namelen)) {
if (e->type < ELEMENT_KILLABLE)
break;
else
continue;
}
if (kill) {
kill_html_stack_item(html_context, e);
break;
}
for (elt = e;
elt != (void *) &html_context->stack;
elt = elt->prev)
if (elt->linebreak > lnb)
lnb = elt->linebreak;
/* This hack forces a line break after a list end. It is needed
* when ending a list with the last <li> having no text the
* line_breax is 2 so the ending list's linebreak will be
* ignored when calling ln_break(). */
if (html_context->was_li)
html_context->line_breax = 0;
ln_break(html_context, lnb);
while (e->prev != (void *) &html_context->stack)
kill_html_stack_item(html_context, e->prev);
kill_html_stack_item(html_context, e);
break;
}
/* dump_html_stack(html_context); */
return html;
}
static unsigned char *
process_element(unsigned char *name, int namelen, int endingtag,
unsigned char *html, unsigned char *prev_html,
unsigned char *eof, unsigned char *attr,
struct html_context *html_context)
{
struct element_info *ei;
#ifndef USE_FASTFIND
{
struct element_info elem;
unsigned char tmp;
tmp = name[namelen];
name[namelen] = '\0';
elem.name = name;
ei = bsearch(&elem, elements, NUMBER_OF_TAGS, sizeof(elem), compar);
name[namelen] = tmp;
}
#else
ei = (struct element_info *) fastfind_search(&ff_tags_index, name, namelen);
#endif
if (html_context->was_xmp || html_context->was_style) {
if (!ei || (ei->func != html_xmp && ei->func != html_style) || !endingtag) {
put_chrs(html_context, "<", 1);
return prev_html + 1;
}
}
if (!ei) return html;
if (!endingtag) {
return start_element(ei, name, namelen, html, eof, attr, html_context);
} else {
return end_element(ei, name, namelen, html, eof, attr, html_context);
}
}
void
scan_http_equiv(unsigned char *s, unsigned char *eof, struct string *head,
struct string *title, struct document_options *options)
{
unsigned char *name, *attr, *he, *c;
int namelen;
if (title && !init_string(title)) return;
add_char_to_string(head, '\n');
se:
while (s < eof && *s != '<') {
sp:
s++;
}
if (s >= eof) return;
if (s + 2 <= eof && (s[1] == '!' || s[1] == '?')) {
s = skip_comment(s, eof);
goto se;
}
if (parse_element(s, eof, &name, &namelen, &attr, &s)) goto sp;
ps:
if (!strlcasecmp(name, namelen, "HEAD", 4)) goto se;
if (!strlcasecmp(name, namelen, "/HEAD", 5)) return;
if (!strlcasecmp(name, namelen, "BODY", 4)) return;
if (title && !title->length && !strlcasecmp(name, namelen, "TITLE", 5)) {
unsigned char *s1;
xse:
s1 = s;
while (s < eof && *s != '<') {
xsp:
s++;
}
if (s - s1)
add_bytes_to_string(title, s1, s - s1);
if (s >= eof) goto se;
if (s + 2 <= eof && (s[1] == '!' || s[1] == '?')) {
s = skip_comment(s, eof);
goto xse;
}
if (parse_element(s, eof, &name, &namelen, &attr, &s)) {
s1 = s;
goto xsp;
}
clr_spaces(title->source);
goto ps;
}
if (strlcasecmp(name, namelen, "META", 4)) goto se;
he = get_attr_val(attr, "charset", options);
if (he) {
add_to_string(head, "Charset: ");
add_to_string(head, he);
mem_free(he);
}
he = get_attr_val(attr, "http-equiv", options);
if (!he) goto se;
add_to_string(head, he);
mem_free(he);
c = get_attr_val(attr, "content", options);
if (c) {
add_to_string(head, ": ");
add_to_string(head, c);
mem_free(c);
}
add_crlf_to_string(head);
goto se;
}