mirror of
https://github.com/rkd77/elinks.git
synced 2025-02-02 15:09:23 -05:00
cp (codepage). To fix bug 784 html_context->part->document->cp should be passed to get_attr_val instead of html_context->options->cp.
852 lines
20 KiB
C
852 lines
20 KiB
C
/* HTML parser */
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
#include <errno.h>
|
|
#include <stdarg.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "elinks.h"
|
|
|
|
#include "bfu/listmenu.h"
|
|
#include "bfu/menu.h"
|
|
#include "document/css/apply.h"
|
|
#include "document/css/css.h"
|
|
#include "document/css/stylesheet.h"
|
|
#include "document/html/frames.h"
|
|
#include "document/html/parser/link.h"
|
|
#include "document/html/parser/stack.h"
|
|
#include "document/html/parser/parse.h"
|
|
#include "document/html/parser.h"
|
|
#include "document/html/renderer.h"
|
|
#include "document/options.h"
|
|
#include "document/renderer.h"
|
|
#include "intl/charsets.h"
|
|
#include "protocol/date.h"
|
|
#include "protocol/header.h"
|
|
#include "protocol/uri.h"
|
|
#include "session/task.h"
|
|
#include "terminal/draw.h"
|
|
#include "util/align.h"
|
|
#include "util/box.h"
|
|
#include "util/color.h"
|
|
#include "util/conv.h"
|
|
#include "util/error.h"
|
|
#include "util/memdebug.h"
|
|
#include "util/memlist.h"
|
|
#include "util/memory.h"
|
|
#include "util/string.h"
|
|
|
|
/* Unsafe macros */
|
|
#include "document/html/internal.h"
|
|
|
|
/* TODO: This needs rewrite. Yes, no kidding. */
|
|
|
|
|
|
int
|
|
get_color(struct html_context *html_context, unsigned char *a,
|
|
unsigned char *c, color_T *rgb)
|
|
{
|
|
unsigned char *at;
|
|
int r;
|
|
|
|
if (!use_document_fg_colors(html_context->options))
|
|
return -1;
|
|
|
|
at = get_attr_val(a, c, html_context->options->cp);
|
|
if (!at) return -1;
|
|
|
|
r = decode_color(at, strlen(at), rgb);
|
|
mem_free(at);
|
|
|
|
return r;
|
|
}
|
|
|
|
int
|
|
get_bgcolor(struct html_context *html_context, unsigned char *a, color_T *rgb)
|
|
{
|
|
if (!use_document_bg_colors(html_context->options))
|
|
return -1;
|
|
|
|
return get_color(html_context, a, "bgcolor", rgb);
|
|
}
|
|
|
|
unsigned char *
|
|
get_target(struct document_options *options, unsigned char *a)
|
|
{
|
|
unsigned char *v = get_attr_val(a, "target", options->cp);
|
|
|
|
if (!v) return NULL;
|
|
|
|
if (!*v || !strcasecmp(v, "_self")) {
|
|
mem_free_set(&v, stracpy(options->framename));
|
|
}
|
|
|
|
return v;
|
|
}
|
|
|
|
|
|
void
|
|
ln_break(struct html_context *html_context, int n)
|
|
{
|
|
if (!n || html_top->invisible) return;
|
|
while (n > html_context->line_breax) {
|
|
html_context->line_breax++;
|
|
html_context->line_break_f(html_context);
|
|
}
|
|
html_context->position = 0;
|
|
html_context->putsp = HTML_SPACE_SUPPRESS;
|
|
}
|
|
|
|
void
|
|
put_chrs(struct html_context *html_context, unsigned char *start, int len)
|
|
{
|
|
if (html_is_preformatted())
|
|
html_context->putsp = HTML_SPACE_NORMAL;
|
|
|
|
if (!len || html_top->invisible)
|
|
return;
|
|
|
|
switch (html_context->putsp) {
|
|
case HTML_SPACE_NORMAL:
|
|
break;
|
|
|
|
case HTML_SPACE_ADD:
|
|
html_context->put_chars_f(html_context, " ", 1);
|
|
html_context->position++;
|
|
html_context->putsp = HTML_SPACE_SUPPRESS;
|
|
|
|
/* Fall thru. */
|
|
|
|
case HTML_SPACE_SUPPRESS:
|
|
html_context->putsp = HTML_SPACE_NORMAL;
|
|
if (isspace(start[0])) {
|
|
start++, len--;
|
|
|
|
if (!len) {
|
|
html_context->putsp = HTML_SPACE_SUPPRESS;
|
|
return;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
if (isspace(start[len - 1]) && !html_is_preformatted())
|
|
html_context->putsp = HTML_SPACE_SUPPRESS;
|
|
html_context->was_br = 0;
|
|
|
|
html_context->put_chars_f(html_context, start, len);
|
|
|
|
html_context->position += len;
|
|
html_context->line_breax = 0;
|
|
if (html_context->was_li > 0)
|
|
html_context->was_li--;
|
|
}
|
|
|
|
void
|
|
set_fragment_identifier(struct html_context *html_context,
|
|
unsigned char *attr_name, unsigned char *attr)
|
|
{
|
|
unsigned char *id_attr;
|
|
|
|
id_attr = get_attr_val(attr_name, attr, html_context->options->cp);
|
|
|
|
if (id_attr) {
|
|
html_context->special_f(html_context, SP_TAG, id_attr);
|
|
mem_free(id_attr);
|
|
}
|
|
}
|
|
|
|
void
|
|
add_fragment_identifier(struct html_context *html_context,
|
|
struct part *part, unsigned char *attr)
|
|
{
|
|
struct part *saved_part = html_context->part;
|
|
|
|
html_context->part = part;
|
|
html_context->special_f(html_context, SP_TAG, attr);
|
|
html_context->part = saved_part;
|
|
}
|
|
|
|
#ifdef CONFIG_CSS
|
|
void
|
|
import_css_stylesheet(struct css_stylesheet *css, struct uri *base_uri,
|
|
unsigned char *url, int len)
|
|
{
|
|
struct html_context *html_context = css->import_data;
|
|
unsigned char *import_url;
|
|
struct uri *uri;
|
|
|
|
assert(html_context);
|
|
assert(base_uri);
|
|
|
|
if (!html_context->options->css_enable
|
|
|| !html_context->options->css_import)
|
|
return;
|
|
|
|
url = memacpy(url, len);
|
|
if (!url) return;
|
|
|
|
/* HTML <head> urls should already be fine but we can.t detect them. */
|
|
import_url = join_urls(base_uri, url);
|
|
mem_free(url);
|
|
|
|
if (!import_url) return;
|
|
|
|
uri = get_uri(import_url, URI_BASE);
|
|
mem_free(import_url);
|
|
|
|
if (!uri) return;
|
|
|
|
/* Request the imported stylesheet as part of the document ... */
|
|
html_context->special_f(html_context, SP_STYLESHEET, uri);
|
|
|
|
/* ... and then attempt to import from the cache. */
|
|
import_css(css, uri);
|
|
|
|
done_uri(uri);
|
|
}
|
|
#endif
|
|
|
|
/* Extract the extra information that is available for elements which can
|
|
* receive focus. Call this from each element which supports tabindex or
|
|
* accesskey. */
|
|
/* Note that in ELinks, we support those attributes (I mean, we call this
|
|
* function) while processing any focusable element (otherwise it'd have zero
|
|
* tabindex, thus messing up navigation between links), thus we support these
|
|
* attributes even near tags where we're not supposed to (like IFRAME, FRAME or
|
|
* LINK). I think this doesn't make any harm ;). --pasky */
|
|
void
|
|
html_focusable(struct html_context *html_context, unsigned char *a)
|
|
{
|
|
struct document_options *options;
|
|
unsigned char *accesskey;
|
|
int cp;
|
|
int tabindex;
|
|
|
|
format.accesskey = 0;
|
|
format.tabindex = 0x80000000;
|
|
|
|
if (!a) return;
|
|
|
|
options = html_context->options;
|
|
cp = options->cp;
|
|
|
|
accesskey = get_attr_val(a, "accesskey", cp);
|
|
if (accesskey) {
|
|
format.accesskey = accesskey_string_to_unicode(accesskey);
|
|
mem_free(accesskey);
|
|
}
|
|
|
|
tabindex = get_num(a, "tabindex", options->cp);
|
|
if (0 < tabindex && tabindex < 32767) {
|
|
format.tabindex = (tabindex & 0x7fff) << 16;
|
|
}
|
|
|
|
mem_free_set(&format.onclick, get_attr_val(a, "onclick", cp));
|
|
mem_free_set(&format.ondblclick, get_attr_val(a, "ondblclick", cp));
|
|
mem_free_set(&format.onmouseover, get_attr_val(a, "onmouseover", cp));
|
|
mem_free_set(&format.onhover, get_attr_val(a, "onhover", cp));
|
|
mem_free_set(&format.onfocus, get_attr_val(a, "onfocus", cp));
|
|
mem_free_set(&format.onmouseout, get_attr_val(a, "onmouseout", cp));
|
|
mem_free_set(&format.onblur, get_attr_val(a, "onblur", cp));
|
|
}
|
|
|
|
void
|
|
html_skip(struct html_context *html_context, unsigned char *a)
|
|
{
|
|
html_top->invisible = 1;
|
|
html_top->type = ELEMENT_DONT_KILL;
|
|
}
|
|
|
|
/* Parse meta refresh without URL= in it:
|
|
* <meta http-equiv="refresh" content="3,http://elinks.or.cz/">
|
|
* <meta http-equiv="refresh" content="3; http://elinks.or.cz/">
|
|
* <meta http-equiv="refresh" content=" 3 ; http://elinks.or.cz/ ">
|
|
*/
|
|
static void
|
|
parse_old_meta_refresh(unsigned char *str, unsigned char **ret)
|
|
{
|
|
unsigned char *p = str;
|
|
int len;
|
|
|
|
assert(str && ret);
|
|
if_assert_failed return;
|
|
|
|
*ret = NULL;
|
|
while (*p && (*p == ' ' || *p == ASCII_TAB)) p++;
|
|
if (!*p) return;
|
|
while (*p && *p >= '0' && *p <= '9') p++;
|
|
if (!*p) return;
|
|
while (*p && (*p == ' ' || *p == ASCII_TAB)) p++;
|
|
if (!*p) return;
|
|
if (*p == ';' || *p == ',') p++; else return;
|
|
while (*p && (*p == ' ' || *p == ASCII_TAB)) p++;
|
|
if (!*p) return;
|
|
|
|
len = strlen(p);
|
|
while (len && (p[len] == ' ' || p[len] == ASCII_TAB)) len--;
|
|
if (len) *ret = memacpy(p, len);
|
|
}
|
|
|
|
void
|
|
process_head(struct html_context *html_context, unsigned char *head)
|
|
{
|
|
unsigned char *refresh, *url;
|
|
|
|
refresh = parse_header(head, "Refresh", NULL);
|
|
if (!refresh) return;
|
|
|
|
parse_header_param(refresh, "URL", &url);
|
|
if (!url) {
|
|
/* Let's try a more tolerant parsing. */
|
|
parse_old_meta_refresh(refresh, &url);
|
|
if (!url) {
|
|
/* If the URL parameter is missing assume that the
|
|
* document being processed should be refreshed. */
|
|
url = get_uri_string(html_context->base_href, URI_ORIGINAL);
|
|
}
|
|
}
|
|
|
|
if (url) {
|
|
/* Extraction of refresh time. */
|
|
unsigned long seconds = 0;
|
|
int valid = 1;
|
|
|
|
/* We try to extract the refresh time, and to handle weird things
|
|
* in an elegant way. Among things we can have negative values,
|
|
* too big ones, just ';' (we assume 0 seconds in that case) and
|
|
* more. */
|
|
if (*refresh != ';') {
|
|
if (isdigit(*refresh)) {
|
|
unsigned long max_seconds = HTTP_REFRESH_MAX_DELAY;
|
|
|
|
errno = 0;
|
|
seconds = strtoul(refresh, NULL, 10);
|
|
if (errno == ERANGE || seconds > max_seconds) {
|
|
/* Too big refresh value, limit it. */
|
|
seconds = max_seconds;
|
|
} else if (errno) {
|
|
/* Bad syntax */
|
|
valid = 0;
|
|
}
|
|
} else {
|
|
/* May be a negative number, or some bad syntax. */
|
|
valid = 0;
|
|
}
|
|
}
|
|
|
|
if (valid) {
|
|
unsigned char *joined_url = join_urls(html_context->base_href, url);
|
|
|
|
html_focusable(html_context, NULL);
|
|
|
|
put_link_line("Refresh: ", url, joined_url,
|
|
html_context->options->framename, html_context);
|
|
html_context->special_f(html_context, SP_REFRESH, seconds, joined_url);
|
|
|
|
mem_free(joined_url);
|
|
}
|
|
|
|
mem_free(url);
|
|
}
|
|
|
|
mem_free(refresh);
|
|
|
|
if (!get_opt_bool("document.cache.ignore_cache_control")) {
|
|
unsigned char *d;
|
|
int no_cache = 0;
|
|
time_t expires = 0;
|
|
|
|
/* XXX: Code duplication with HTTP protocol backend. */
|
|
/* I am not entirely sure in what order we should process these
|
|
* headers and if we should still process Cache-Control max-age
|
|
* if we already set max age to date mentioned in Expires.
|
|
* --jonas */
|
|
if ((d = parse_header(head, "Pragma", NULL))) {
|
|
if (strstr(d, "no-cache")) {
|
|
no_cache = 1;
|
|
}
|
|
mem_free(d);
|
|
}
|
|
|
|
if (!no_cache && (d = parse_header(head, "Cache-Control", NULL))) {
|
|
if (strstr(d, "no-cache") || strstr(d, "must-revalidate")) {
|
|
no_cache = 1;
|
|
|
|
} else {
|
|
unsigned char *pos = strstr(d, "max-age=");
|
|
|
|
assert(!no_cache);
|
|
|
|
if (pos) {
|
|
/* Grab the number of seconds. */
|
|
timeval_T max_age, seconds;
|
|
|
|
timeval_from_seconds(&seconds, atol(pos + 8));
|
|
timeval_now(&max_age);
|
|
timeval_add_interval(&max_age, &seconds);
|
|
|
|
expires = timeval_to_seconds(&max_age);
|
|
}
|
|
}
|
|
|
|
mem_free(d);
|
|
}
|
|
|
|
if (!no_cache && (d = parse_header(head, "Expires", NULL))) {
|
|
/* Convert date to seconds. */
|
|
if (strstr(d, "now")) {
|
|
timeval_T now;
|
|
|
|
timeval_now(&now);
|
|
expires = timeval_to_seconds(&now);
|
|
} else {
|
|
expires = parse_date(&d, NULL, 0, 1);
|
|
}
|
|
|
|
mem_free(d);
|
|
}
|
|
|
|
if (no_cache)
|
|
html_context->special_f(html_context, SP_CACHE_CONTROL);
|
|
else if (expires)
|
|
html_context->special_f(html_context,
|
|
SP_CACHE_EXPIRES, expires);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
look_for_map(unsigned char **pos, unsigned char *eof, struct uri *uri,
|
|
struct document_options *options)
|
|
{
|
|
unsigned char *al, *attr, *name;
|
|
int namelen;
|
|
|
|
while (*pos < eof && **pos != '<') {
|
|
(*pos)++;
|
|
}
|
|
|
|
if (*pos >= eof) return 0;
|
|
|
|
if (*pos + 2 <= eof && ((*pos)[1] == '!' || (*pos)[1] == '?')) {
|
|
*pos = skip_comment(*pos, eof);
|
|
return 1;
|
|
}
|
|
|
|
if (parse_element(*pos, eof, &name, &namelen, &attr, pos)) {
|
|
(*pos)++;
|
|
return 1;
|
|
}
|
|
|
|
if (strlcasecmp(name, namelen, "MAP", 3)) return 1;
|
|
|
|
if (uri && uri->fragment) {
|
|
al = get_attr_val(attr, "name", options->cp);
|
|
if (!al) return 1;
|
|
|
|
if (strlcasecmp(al, -1, uri->fragment, uri->fragmentlen)) {
|
|
mem_free(al);
|
|
return 1;
|
|
}
|
|
|
|
mem_free(al);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
look_for_tag(unsigned char **pos, unsigned char *eof,
|
|
unsigned char *name, int namelen, unsigned char **label)
|
|
{
|
|
unsigned char *pos2;
|
|
struct string str;
|
|
|
|
if (!init_string(&str)) {
|
|
/* Is this the right way to bail out? --jonas */
|
|
*pos = eof;
|
|
return 0;
|
|
}
|
|
|
|
pos2 = *pos;
|
|
while (pos2 < eof && *pos2 != '<') {
|
|
pos2++;
|
|
}
|
|
|
|
if (pos2 >= eof) {
|
|
done_string(&str);
|
|
*pos = eof;
|
|
return 0;
|
|
}
|
|
if (pos2 - *pos)
|
|
add_bytes_to_string(&str, *pos, pos2 - *pos);
|
|
*label = str.source;
|
|
|
|
*pos = pos2;
|
|
|
|
if (*pos + 2 <= eof && ((*pos)[1] == '!' || (*pos)[1] == '?')) {
|
|
*pos = skip_comment(*pos, eof);
|
|
return 1;
|
|
}
|
|
|
|
if (parse_element(*pos, eof, NULL, NULL, NULL, &pos2)) return 1;
|
|
|
|
if (strlcasecmp(name, namelen, "A", 1)
|
|
&& strlcasecmp(name, namelen, "/A", 2)
|
|
&& strlcasecmp(name, namelen, "MAP", 3)
|
|
&& strlcasecmp(name, namelen, "/MAP", 4)
|
|
&& strlcasecmp(name, namelen, "AREA", 4)
|
|
&& strlcasecmp(name, namelen, "/AREA", 5)) {
|
|
*pos = pos2;
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
look_for_link(unsigned char **pos, unsigned char *eof, struct menu_item **menu,
|
|
struct memory_list **ml, struct uri *href_base,
|
|
unsigned char *target_base, struct conv_table *ct,
|
|
struct document_options *options)
|
|
{
|
|
unsigned char *attr, *href, *name, *target;
|
|
unsigned char *label = NULL; /* shut up warning */
|
|
struct link_def *ld;
|
|
struct menu_item *nm;
|
|
int nmenu;
|
|
int namelen;
|
|
|
|
while (*pos < eof && **pos != '<') {
|
|
(*pos)++;
|
|
}
|
|
|
|
if (*pos >= eof) return 0;
|
|
|
|
if (*pos + 2 <= eof && ((*pos)[1] == '!' || (*pos)[1] == '?')) {
|
|
*pos = skip_comment(*pos, eof);
|
|
return 1;
|
|
}
|
|
|
|
if (parse_element(*pos, eof, &name, &namelen, &attr, pos)) {
|
|
(*pos)++;
|
|
return 1;
|
|
}
|
|
|
|
if (!strlcasecmp(name, namelen, "A", 1)) {
|
|
while (look_for_tag(pos, eof, name, namelen, &label));
|
|
|
|
if (*pos >= eof) return 0;
|
|
|
|
} else if (!strlcasecmp(name, namelen, "AREA", 4)) {
|
|
unsigned char *alt = get_attr_val(attr, "alt", options->cp);
|
|
|
|
if (alt) {
|
|
label = convert_string(ct, alt, strlen(alt),
|
|
options->cp, CSM_DEFAULT,
|
|
NULL, NULL, NULL);
|
|
mem_free(alt);
|
|
} else {
|
|
label = NULL;
|
|
}
|
|
|
|
} else if (!strlcasecmp(name, namelen, "/MAP", 4)) {
|
|
/* This is the only successful return from here! */
|
|
add_to_ml(ml, *menu, NULL);
|
|
return 0;
|
|
|
|
} else {
|
|
return 1;
|
|
}
|
|
|
|
target = get_target(options, attr);
|
|
if (!target) target = stracpy(empty_string_or_(target_base));
|
|
if (!target) {
|
|
mem_free_if(label);
|
|
return 1;
|
|
}
|
|
|
|
ld = mem_alloc(sizeof(*ld));
|
|
if (!ld) {
|
|
mem_free_if(label);
|
|
mem_free(target);
|
|
return 1;
|
|
}
|
|
|
|
href = get_url_val(attr, "href", options->cp);
|
|
if (!href) {
|
|
mem_free_if(label);
|
|
mem_free(target);
|
|
mem_free(ld);
|
|
return 1;
|
|
}
|
|
|
|
|
|
ld->link = join_urls(href_base, href);
|
|
mem_free(href);
|
|
if (!ld->link) {
|
|
mem_free_if(label);
|
|
mem_free(target);
|
|
mem_free(ld);
|
|
return 1;
|
|
}
|
|
|
|
|
|
ld->target = target;
|
|
for (nmenu = 0; !mi_is_end_of_menu(&(*menu)[nmenu]); nmenu++) {
|
|
struct link_def *ll = (*menu)[nmenu].data;
|
|
|
|
if (!strcmp(ll->link, ld->link) &&
|
|
!strcmp(ll->target, ld->target)) {
|
|
mem_free(ld->link);
|
|
mem_free(ld->target);
|
|
mem_free(ld);
|
|
mem_free_if(label);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if (label) {
|
|
clr_spaces(label);
|
|
|
|
if (!*label) {
|
|
mem_free(label);
|
|
label = NULL;
|
|
}
|
|
}
|
|
|
|
if (!label) {
|
|
label = stracpy(ld->link);
|
|
if (!label) {
|
|
mem_free(target);
|
|
mem_free(ld->link);
|
|
mem_free(ld);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
nm = mem_realloc(*menu, (nmenu + 2) * sizeof(*nm));
|
|
if (nm) {
|
|
*menu = nm;
|
|
memset(&nm[nmenu], 0, 2 * sizeof(*nm));
|
|
nm[nmenu].text = label;
|
|
nm[nmenu].func = map_selected;
|
|
nm[nmenu].data = ld;
|
|
nm[nmenu].flags = NO_INTL;
|
|
}
|
|
|
|
add_to_ml(ml, ld, ld->link, ld->target, label, NULL);
|
|
|
|
return 1;
|
|
}
|
|
|
|
|
|
int
|
|
get_image_map(unsigned char *head, unsigned char *pos, unsigned char *eof,
|
|
struct menu_item **menu, struct memory_list **ml, struct uri *uri,
|
|
struct document_options *options, unsigned char *target_base,
|
|
int to, int def, int hdef)
|
|
{
|
|
struct conv_table *ct;
|
|
struct string hd;
|
|
|
|
if (!init_string(&hd)) return -1;
|
|
|
|
if (head) add_to_string(&hd, head);
|
|
scan_http_equiv(pos, eof, &hd, NULL, options);
|
|
ct = get_convert_table(hd.source, to, def, NULL, NULL, hdef);
|
|
done_string(&hd);
|
|
|
|
*menu = mem_calloc(1, sizeof(**menu));
|
|
if (!*menu) return -1;
|
|
|
|
while (look_for_map(&pos, eof, uri, options));
|
|
|
|
if (pos >= eof) {
|
|
mem_free(*menu);
|
|
return -1;
|
|
}
|
|
|
|
*ml = NULL;
|
|
|
|
while (look_for_link(&pos, eof, menu, ml, uri, target_base, ct, options))
|
|
;
|
|
|
|
if (pos >= eof) {
|
|
freeml(*ml);
|
|
mem_free(*menu);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
|
|
struct html_element *
|
|
init_html_parser_state(struct html_context *html_context,
|
|
enum html_element_mortality_type type,
|
|
int align, int margin, int width)
|
|
{
|
|
html_stack_dup(html_context, type);
|
|
|
|
par_format.align = align;
|
|
|
|
if (type <= ELEMENT_IMMORTAL) {
|
|
par_format.leftmargin = margin;
|
|
par_format.rightmargin = margin;
|
|
par_format.width = width;
|
|
par_format.list_level = 0;
|
|
par_format.list_number = 0;
|
|
par_format.dd_margin = 0;
|
|
html_top->namelen = 0;
|
|
}
|
|
|
|
return html_top;
|
|
}
|
|
|
|
|
|
|
|
void
|
|
done_html_parser_state(struct html_context *html_context,
|
|
struct html_element *element)
|
|
{
|
|
html_context->line_breax = 1;
|
|
|
|
while (html_top != element) {
|
|
pop_html_element(html_context);
|
|
#if 0
|
|
/* I've preserved this bit to show an example of the Old Code
|
|
* of the Mikulas days (I _HOPE_ it's by Mikulas, at least ;-).
|
|
* I think this assert() can never fail, for one. --pasky */
|
|
assertm(html_top && (void *) html_top != (void *) &html_stack,
|
|
"html stack trashed");
|
|
if_assert_failed break;
|
|
#endif
|
|
}
|
|
|
|
html_top->type = ELEMENT_KILLABLE;
|
|
pop_html_element(html_context);
|
|
|
|
}
|
|
|
|
struct html_context *
|
|
init_html_parser(struct uri *uri, struct document_options *options,
|
|
unsigned char *start, unsigned char *end,
|
|
struct string *head, struct string *title,
|
|
void (*put_chars)(struct html_context *, unsigned char *, int),
|
|
void (*line_break)(struct html_context *),
|
|
void *(*special)(struct html_context *, enum html_special_type, ...))
|
|
{
|
|
struct html_context *html_context;
|
|
struct html_element *e;
|
|
|
|
assert(uri && options);
|
|
if_assert_failed return NULL;
|
|
|
|
html_context = mem_calloc(1, sizeof(*html_context));
|
|
if (!html_context) return NULL;
|
|
|
|
#ifdef CONFIG_CSS
|
|
html_context->css_styles.import = import_css_stylesheet;
|
|
init_list(html_context->css_styles.selectors);
|
|
#endif
|
|
|
|
init_list(html_context->stack);
|
|
|
|
html_context->startf = start;
|
|
html_context->put_chars_f = put_chars;
|
|
html_context->line_break_f = line_break;
|
|
html_context->special_f = special;
|
|
|
|
html_context->base_href = get_uri_reference(uri);
|
|
html_context->base_target = null_or_stracpy(options->framename);
|
|
|
|
html_context->options = options;
|
|
|
|
scan_http_equiv(start, end, head, title, options);
|
|
|
|
e = mem_calloc(1, sizeof(*e));
|
|
if (!e) return NULL;
|
|
add_to_list(html_context->stack, e);
|
|
|
|
format.style.attr = 0;
|
|
format.fontsize = 3;
|
|
format.link = format.target = format.image = NULL;
|
|
format.onclick = format.ondblclick = format.onmouseover = format.onhover
|
|
= format.onfocus = format.onmouseout = format.onblur = NULL;
|
|
format.select = NULL;
|
|
format.form = NULL;
|
|
format.title = NULL;
|
|
|
|
format.style.fg = options->default_fg;
|
|
format.style.bg = options->default_bg;
|
|
format.clink = options->default_link;
|
|
format.vlink = options->default_vlink;
|
|
#ifdef CONFIG_BOOKMARKS
|
|
format.bookmark_link = options->default_bookmark_link;
|
|
#endif
|
|
format.image_link = options->default_image_link;
|
|
|
|
par_format.align = ALIGN_LEFT;
|
|
par_format.leftmargin = options->margin;
|
|
par_format.rightmargin = options->margin;
|
|
|
|
par_format.width = options->box.width;
|
|
par_format.list_level = par_format.list_number = 0;
|
|
par_format.dd_margin = options->margin;
|
|
par_format.flags = P_NONE;
|
|
|
|
par_format.bgcolor = options->default_bg;
|
|
|
|
html_top->invisible = 0;
|
|
html_top->name = NULL;
|
|
html_top->namelen = 0;
|
|
html_top->options = NULL;
|
|
html_top->linebreak = 1;
|
|
html_top->type = ELEMENT_DONT_KILL;
|
|
|
|
html_context->has_link_lines = 0;
|
|
html_context->table_level = 0;
|
|
|
|
#ifdef CONFIG_CSS
|
|
html_context->css_styles.import_data = html_context;
|
|
|
|
if (options->css_enable)
|
|
mirror_css_stylesheet(&default_stylesheet,
|
|
&html_context->css_styles);
|
|
#endif
|
|
|
|
return html_context;
|
|
}
|
|
|
|
void
|
|
done_html_parser(struct html_context *html_context)
|
|
{
|
|
#ifdef CONFIG_CSS
|
|
if (html_context->options->css_enable)
|
|
done_css_stylesheet(&html_context->css_styles);
|
|
#endif
|
|
|
|
mem_free(html_context->base_target);
|
|
done_uri(html_context->base_href);
|
|
|
|
kill_html_stack_item(html_context, html_context->stack.next);
|
|
|
|
assertm(list_empty(html_context->stack),
|
|
"html stack not empty after operation");
|
|
if_assert_failed init_list(html_context->stack);
|
|
|
|
mem_free(html_context);
|
|
}
|