1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-06-15 23:35:34 +00:00

CONFIG_DOM_HTML - Introduce stub for experimental DOM-based HTML parser

Currently living in document/html/dom.*, this aims to be an alternative
for the original Mikulas' HTML parser, using the experimental DOM engine.
It does not aim to replace Mikulas' HTML renderer just yet.

It is shifted into the existing infrastructure pretty uglily now, it will need
more hacking to make this a bit cleaner. But surprisingly, only three files
outside of document/html needed ifdefs, two of them in document/. Nifty!
This commit is contained in:
Petr Baudis 2007-08-29 00:12:24 +02:00 committed by Petr Baudis
parent 6b05ddb762
commit dc711228b6
13 changed files with 334 additions and 28 deletions

View File

@ -129,6 +129,7 @@ CONFIG_GOPHER = @CONFIG_GOPHER@
CONFIG_GPM = @CONFIG_GPM@
CONFIG_GZIP = @CONFIG_GZIP@
CONFIG_HTML_HIGHLIGHT = @CONFIG_HTML_HIGHLIGHT@
CONFIG_DOM_HTML = @CONFIG_DOM_HTML@
CONFIG_IDN = @CONFIG_IDN@
CONFIG_INTERLINK = @CONFIG_INTERLINK@
CONFIG_IPV6 = @CONFIG_IPV6@

View File

@ -1310,9 +1310,12 @@ EL_ARG_ENABLE(CONFIG_CSS, css, [Cascading Style Sheets],
EL_ARG_DEPEND(CONFIG_HTML_HIGHLIGHT, html-highlight, [CONFIG_CSS:yes], [HTML highlighting],
[ --enable-html-highlight HTML highlighting using DOM engine])
EL_ARG_DEPEND(CONFIG_DOM_HTML, dom-html, [CONFIG_CSS:yes], [DOM-based HTML parser],
[ --enable-dom-html enable highly experimental DOM-based HTML parser])
dnl Everything in the tree already uses CONFIG_DOM
dnl so resolve CONFIG_HTML_HIGHLIGHT to CONFIG_DOM
EL_CONFIG_DEPENDS(CONFIG_DOM, [CONFIG_HTML_HIGHLIGHT], [DOM engine])
dnl so resolve the DOM-dependend options to CONFIG_DOM
EL_CONFIG_DEPENDS(CONFIG_DOM, [CONFIG_HTML_HIGHLIGHT CONFIG_DOM_HTML], [DOM engine])
EL_ARG_DEPEND(CONFIG_BACKTRACE, backtrace, [HAVE_EXECINFO:yes], [Backtrace],
[ --disable-backtrace disable backtrace support])

View File

@ -414,6 +414,28 @@ CONFIG_CSS=yes
CONFIG_HTML_HIGHLIGHT=no
### Highly experimental DOM-based HTML parser
#
# Currently, ELinks still used enhanced version of the original Links HTML
# parser by Mikulas Patocka. The parser is quite fast, well tested and can
# handle all sorts of legacy HTML. However, it is not written very cleanly
# and it does not allow for support of box model and advanced styling and
# does not provide DOM model of the HTML document, required by much advanced
# javascript code.
#
# This option enables an alternative HTML parser written from scratch and
# using the also-experimental DOM implementation. The parser probably still
# doesn't really do anything useful and it is totally unsuitable for normal
# browsing. The original HTML renderer is still used (for now).
#
# Do not enable this option unless you are an interested developer!
#
# Default: disabled, requires that CSS is enabled and you know what are you
# doing
CONFIG_DOM_HTML=no
### ECMAScript (JavaScript) Browser Scripting
#
# By enabling this feature, certain parts of ELinks, such as the goto URL

View File

@ -22,7 +22,9 @@
#include "document/css/scanner.h"
#include "document/css/stylesheet.h"
#include "document/format.h"
#ifndef CONFIG_DOM_HTML
#include "document/html/parser/parse.h"
#endif
#include "document/options.h"
#include "util/align.h"
#include "util/color.h"
@ -35,6 +37,8 @@
#include "document/html/internal.h"
#ifndef CONFIG_DOM_HTML
typedef void (*css_applier_T)(struct html_context *html_context,
struct html_element *element,
struct css_property *prop);
@ -297,3 +301,34 @@ css_apply(struct html_context *html_context, struct html_element *element,
done_css_selector(selector);
}
#else
struct css_selector *
get_css_selector_for_element(struct html_context *html_context,
struct html_element *element,
struct css_stylesheet *css,
LIST_OF(struct html_element) *html_stack)
{
INTERNAL("Cannot get CSS selector for DOM HTML engine yet");
return NULL;
}
void
apply_css_selector_style(struct html_context *html_context,
struct html_element *element,
struct css_selector *selector)
{
INTERNAL("Cannot apply CSS selector for DOM HTML engine yet");
}
void
css_apply(struct html_context *html_context, struct html_element *element,
struct css_stylesheet *css, LIST_OF(struct html_element) *html_stack)
{
INTERNAL("Cannot apply CSS for DOM HTML engine yet");
}
#endif

View File

@ -17,8 +17,9 @@
#include "document/document.h"
#include "document/forms.h"
#include "document/html/frames.h"
#include "document/html/parser.h"
#ifndef CONFIG_DOM_HTML
#include "document/html/parser/parse.h"
#endif
#include "document/html/renderer.h"
#include "document/options.h"
#include "document/refresh.h"
@ -332,13 +333,17 @@ get_format_cache_refresh_count(void)
static void
init_documents(struct module *module)
{
#ifndef CONFIG_DOM_HTML
init_tags_lookup();
#endif
}
static void
done_documents(struct module *module)
{
#ifndef CONFIG_DOM_HTML
free_tags_lookup();
#endif
free_table_cache();
}

View File

@ -1,7 +1,13 @@
top_builddir=../../..
include $(top_builddir)/Makefile.config
SUBDIRS = parser
OBJS = frames.o parser.o renderer.o tables.o
# The original HTML parser
SUBDIRS-unless$(CONFIG_DOM_HTML) = parser
OBJS-unless$(CONFIG_DOM_HTML) = parser.o tables.o
# The DOM-based HTML parser
OBJS-$(CONFIG_DOM_HTML) = dom.o
OBJS = frames.o renderer.o
include $(top_srcdir)/Makefile.lib

149
src/document/html/dom.c Normal file
View File

@ -0,0 +1,149 @@
/* Experimental DOM-based HTML parser */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <string.h>
#include "elinks.h"
#include "document/css/css.h"
#include "document/css/stylesheet.h"
#include "document/html/dom.h"
#include "document/html/renderer.h"
#include "document/options.h"
#include "dom/configuration.h"
#include "dom/scanner.h"
#include "dom/sgml/parser.h"
#include "dom/sgml/html/html.h"
#include "dom/sgml/rss/rss.h"
#include "dom/node.h"
#include "dom/stack.h"
#include "util/error.h"
#include "util/memory.h"
#include "util/string.h"
#include "document/html/internal.h"
#ifdef CONFIG_CSS
void
import_css_stylesheet(struct css_stylesheet *css, struct uri *base_uri,
unsigned char *url, int len)
{
struct html_context *html_context = css->import_data;
unsigned char *import_url;
struct uri *uri;
assert(html_context);
assert(base_uri);
if (!html_context->options->css_enable
|| !html_context->options->css_import)
return;
url = memacpy(url, len);
if (!url) return;
/* HTML <head> urls should already be fine but we can.t detect them. */
import_url = join_urls(base_uri, url);
mem_free(url);
if (!import_url) return;
uri = get_uri(import_url, URI_BASE);
mem_free(import_url);
if (!uri) return;
/* Request the imported stylesheet as part of the document ... */
html_context->special_f(html_context, SP_STYLESHEET, uri);
/* ... and then attempt to import from the cache. */
import_css(css, uri);
done_uri(uri);
}
#endif
struct html_context *
init_html_parser(struct uri *uri, struct document_options *options,
unsigned char *start, unsigned char *end,
struct string *head, struct string *title,
void (*put_chars)(struct html_context *,
unsigned char *, int),
void (*line_break)(struct html_context *),
void *(*special)(struct html_context *,
enum html_special_type, ...))
{
struct html_context *html_context;
assert(uri && options);
if_assert_failed return NULL;
html_context = mem_calloc(1, sizeof(*html_context));
if (!html_context) return NULL;
init_string(title);
#ifdef CONFIG_CSS
html_context->css_styles.import = import_css_stylesheet;
init_css_selector_set(&html_context->css_styles.selectors);
#endif
html_context->put_chars_f = put_chars;
html_context->line_break_f = line_break;
html_context->special_f = special;
html_context->base_href = get_uri_reference(uri);
html_context->base_target = null_or_stracpy(options->framename);
html_context->options = options;
html_context->table_level = 0;
#ifdef CONFIG_CSS
html_context->css_styles.import_data = html_context;
if (options->css_enable)
mirror_css_stylesheet(&default_stylesheet,
&html_context->css_styles);
#endif
return html_context;
}
void
done_html_parser(struct html_context *html_context)
{
#ifdef CONFIG_CSS
if (html_context->options->css_enable)
done_css_stylesheet(&html_context->css_styles);
#endif
mem_free(html_context->base_target);
done_uri(html_context->base_href);
mem_free(html_context);
}
void *
init_html_parser_state(struct html_context *html_context,
enum html_element_mortality_type type,
int align, int margin, int width)
{
return NULL;
}
void
done_html_parser_state(struct html_context *html_context,
void *state)
{
}
void
parse_html(unsigned char *html, unsigned char *eof, struct part *part,
unsigned char *head, struct html_context *html_context)
{
}

32
src/document/html/dom.h Normal file
View File

@ -0,0 +1,32 @@
#ifndef EL__DOCUMENT_HTML_DOM_H
#define EL__DOCUMENT_HTML_DOM_H
struct document_options;
struct html_context;
struct part;
struct string;
enum html_special_type;
enum html_element_mortality_type;
struct html_context *init_html_parser(struct uri *uri,
struct document_options *options,
unsigned char *start, unsigned char *end,
struct string *head, struct string *title,
void (*put_chars)(struct html_context *,
unsigned char *, int),
void (*line_break)(struct html_context *),
void *(*special)(struct html_context *,
enum html_special_type, ...));
void done_html_parser(struct html_context *html_context);
void *init_html_parser_state(struct html_context *html_context,
enum html_element_mortality_type type,
int align, int margin, int width);
void done_html_parser_state(struct html_context *html_context,
void *state);
void parse_html(unsigned char *html, unsigned char *eof, struct part *part,
unsigned char *head, struct html_context *html_context);
#endif

View File

@ -3,17 +3,38 @@
#define EL__DOCUMENT_HTML_INTERNAL_H
#include "document/css/stylesheet.h"
#define INSIDE_INTERNAL_H
#include "document/html/parser.h"
#undef INSIDE_INTERNAL_H
#include "util/lists.h"
struct document_options;
struct uri;
#ifndef CONFIG_DOM_CSS
/* For parser/parse.c: */
void process_head(struct html_context *html_context, unsigned char *head);
void put_chrs(struct html_context *html_context, unsigned char *start, int len);
/* For parser/link.c: */
void html_focusable(struct html_context *html_context, unsigned char *a);
void html_skip(struct html_context *html_context, unsigned char *a);
unsigned char *get_target(struct document_options *options, unsigned char *a);
void
import_css_stylesheet(struct css_stylesheet *css, struct uri *base_uri,
unsigned char *url, int len);
#endif
/* The HTML parser context. It is also heavily used by the renderer so DOM
* parser must use it as well. */
#ifndef CONFIG_DOM_CSS
enum html_whitespace_state {
/* Either we are starting a new "block" or the last segment of the
* current "block" is ending with whitespace and we should eat any
@ -35,6 +56,7 @@ enum html_whitespace_state {
* put_chrs(" "). That needs more investigation yet. --pasky */
HTML_SPACE_ADD,
};
#endif
struct html_context {
#ifdef CONFIG_CSS
@ -55,6 +77,10 @@ struct html_context {
* It is copied here because part->document is NULL sometimes. */
int doc_cp;
#ifdef CONFIG_DOM_HTML
struct text_attrib attr;
struct par_attrib parattr;
#else
/* For:
* html/parser/parse.c
* html/parser/stack.c
@ -80,11 +106,12 @@ struct html_context {
unsigned int was_body:1; /* For META refresh inside <body>. */
unsigned int was_body_background:1; /* For <HTML> with style. */
/* For html/parser.c, html/renderer.c */
int margin;
/* For parser/forms.c: */
unsigned char *startf;
#endif
/* For html/parser.c, html/renderer.c */
int margin;
/* For:
* html/parser/parse.c
@ -98,14 +125,17 @@ struct html_context {
* html/parser/link.c
* html/parser/parse.c
* html/parser/stack.c
* html/parser.c */
* html/parser.c
* html/dom.c
* html/renderer.c */
struct part *part;
/* For:
* html/parser/forms.c
* html/parser/link.c
* html/parser/parse.c
* html/parser.c */
* html/parser.c
* html/dom.c */
/* Note that this is for usage by put_chrs only; anywhere else in
* the parser, one should use put_chrs. */
void (*put_chars_f)(struct html_context *, unsigned char *, int);
@ -115,34 +145,33 @@ struct html_context {
* html/parser/link.c
* html/parser/parse.c
* html/parser/stack.c
* html/parser.c */
* html/parser.c
* html/dom.c */
void (*line_break_f)(struct html_context *);
/* For:
* html/parser/forms.c
* html/parser/parse.c
* html/parser.c */
* html/parser.c
* html/dom.c */
void *(*special_f)(struct html_context *, enum html_special_type, ...);
};
#ifdef CONFIG_DOM_HTML
#define format (html_context->attr)
#define par_format (html_context->parattr)
#else
#define html_top ((struct html_element *) html_context->stack.next)
#define html_bottom ((struct html_element *) html_context->stack.prev)
#define format (html_top->attr)
#define par_format (html_top->parattr)
#endif
#define html_is_preformatted() (format.style.attr & AT_PREFORMATTED)
#ifndef CONFIG_DOM_HTML
#define get_html_max_width() \
int_max(par_format.width - (par_format.leftmargin + par_format.rightmargin), 0)
/* For parser/link.c: */
void html_focusable(struct html_context *html_context, unsigned char *a);
void html_skip(struct html_context *html_context, unsigned char *a);
unsigned char *get_target(struct document_options *options, unsigned char *a);
void
import_css_stylesheet(struct css_stylesheet *css, struct uri *base_uri,
unsigned char *url, int len);
#endif
#endif

View File

@ -702,7 +702,7 @@ get_image_map(unsigned char *head, unsigned char *pos, unsigned char *eof,
struct html_element *
void *
init_html_parser_state(struct html_context *html_context,
enum html_element_mortality_type type,
int align, int margin, int width)
@ -725,11 +725,10 @@ init_html_parser_state(struct html_context *html_context,
}
void
done_html_parser_state(struct html_context *html_context,
struct html_element *element)
done_html_parser_state(struct html_context *html_context, void *state)
{
struct html_element *element = state;
html_context->line_breax = 1;
while (html_top != element) {
@ -746,9 +745,10 @@ done_html_parser_state(struct html_context *html_context,
html_top->type = ELEMENT_KILLABLE;
pop_html_element(html_context);
}
/* This function does not set html_context.doc_cp = document.cp,
* because it does not know the document, and because the codepage has
* not even been decided when it is called. */

View File

@ -8,6 +8,12 @@
#include "util/color.h"
#include "util/lists.h"
#ifndef INSIDE_INTERNAL_H
#ifdef CONFIG_DOM_HTML
#error html/parser.h included even though DOM parser is configured to use!
#endif
#endif
struct document_options;
struct form_control;
struct frameset_desc;
@ -112,6 +118,12 @@ enum html_element_mortality_type {
ELEMENT_WEAK,
};
#ifdef CONFIG_DOM_HTML
struct html_element;
#else
struct html_element {
LIST_HEAD(struct html_element);
@ -183,3 +195,5 @@ void ln_break(struct html_context *html_context, int n);
int get_color(struct html_context *html_context, unsigned char *a, unsigned char *c, color_T *rgb);
#endif
#endif

View File

@ -15,10 +15,14 @@
#include "document/docdata.h"
#include "document/document.h"
#include "document/html/frames.h"
#ifdef CONFIG_DOM_HTML
#include "document/html/dom.h"
#else
#include "document/html/parser.h"
#include "document/html/parser/parse.h"
#include "document/html/renderer.h"
#include "document/html/tables.h"
#endif
#include "document/html/renderer.h"
#include "document/options.h"
#include "document/refresh.h"
#include "document/renderer.h"

View File

@ -17,7 +17,9 @@
#include "dialogs/menu.h"
#include "dialogs/status.h"
#include "document/document.h"
#ifndef CONFIG_DOM_HTML
#include "document/html/parser.h"
#endif
#include "document/refresh.h"
#include "document/view.h"
#include "intl/gettext/libintl.h"
@ -367,6 +369,9 @@ x:
static void
ses_imgmap(struct session *ses)
{
#ifdef CONFIG_DOM_HTML
INTERNAL("Image maps not supported with DOM HTML parser");
#else
struct cache_entry *cached = find_in_cache(ses->loading_uri);
struct document_view *doc_view = current_frame(ses);
struct fragment *fragment;
@ -395,6 +400,7 @@ ses_imgmap(struct session *ses)
add_empty_window(ses->tab->term, (void (*)(void *)) freeml, ml);
do_menu(ses->tab->term, menu, ses, 0);
#endif
}
enum do_move {