0
0
mirror of https://github.com/rkd77/elinks.git synced 2025-06-30 22:19:29 -04:00

Merge with dom-parser-stream

This commit is contained in:
Jonas Fonseca 2005-12-05 19:41:56 +01:00 committed by Jonas Fonseca
commit 1d24d549e4
7 changed files with 202 additions and 155 deletions

View File

@ -28,6 +28,7 @@
#include "util/box.h" #include "util/box.h"
#include "util/error.h" #include "util/error.h"
#include "util/memory.h" #include "util/memory.h"
#include "util/scanner.h"
#include "util/snprintf.h" #include "util/snprintf.h"
#include "util/string.h" #include "util/string.h"
@ -387,7 +388,7 @@ add_dom_link(struct dom_renderer *renderer, unsigned char *string, int length)
static struct dom_node * static struct dom_node *
render_dom_tree(struct dom_stack *stack, struct dom_node *node, void *data) render_dom_tree(struct dom_stack *stack, struct dom_node *node, void *data)
{ {
struct dom_renderer *renderer = stack->data; struct dom_renderer *renderer = stack->renderer;
struct screen_char *template = &renderer->styles[node->type]; struct screen_char *template = &renderer->styles[node->type];
unsigned char *name, *value; unsigned char *name, *value;
@ -407,7 +408,7 @@ render_dom_tree(struct dom_stack *stack, struct dom_node *node, void *data)
static struct dom_node * static struct dom_node *
render_dom_tree_id_leaf(struct dom_stack *stack, struct dom_node *node, void *data) render_dom_tree_id_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
{ {
struct dom_renderer *renderer = stack->data; struct dom_renderer *renderer = stack->renderer;
struct document *document = renderer->document; struct document *document = renderer->document;
struct screen_char *template = &renderer->styles[node->type]; struct screen_char *template = &renderer->styles[node->type];
unsigned char *name, *value, *id; unsigned char *name, *value, *id;
@ -430,7 +431,7 @@ render_dom_tree_id_leaf(struct dom_stack *stack, struct dom_node *node, void *da
static struct dom_node * static struct dom_node *
render_dom_tree_leaf(struct dom_stack *stack, struct dom_node *node, void *data) render_dom_tree_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
{ {
struct dom_renderer *renderer = stack->data; struct dom_renderer *renderer = stack->renderer;
struct document *document = renderer->document; struct document *document = renderer->document;
struct screen_char *template = &renderer->styles[node->type]; struct screen_char *template = &renderer->styles[node->type];
unsigned char *name, *value; unsigned char *name, *value;
@ -452,7 +453,7 @@ render_dom_tree_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
static struct dom_node * static struct dom_node *
render_dom_tree_branch(struct dom_stack *stack, struct dom_node *node, void *data) render_dom_tree_branch(struct dom_stack *stack, struct dom_node *node, void *data)
{ {
struct dom_renderer *renderer = stack->data; struct dom_renderer *renderer = stack->renderer;
struct document *document = renderer->document; struct document *document = renderer->document;
struct screen_char *template = &renderer->styles[node->type]; struct screen_char *template = &renderer->styles[node->type];
unsigned char *name, *id; unsigned char *name, *id;
@ -536,7 +537,7 @@ render_dom_node_text(struct dom_renderer *renderer, struct screen_char *template
static struct dom_node * static struct dom_node *
render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *data) render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *data)
{ {
struct dom_renderer *renderer = stack->data; struct dom_renderer *renderer = stack->renderer;
assert(node && renderer && renderer->document); assert(node && renderer && renderer->document);
@ -550,7 +551,7 @@ render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *dat
static struct dom_node * static struct dom_node *
render_dom_proc_instr_source(struct dom_stack *stack, struct dom_node *node, void *data) render_dom_proc_instr_source(struct dom_stack *stack, struct dom_node *node, void *data)
{ {
struct dom_renderer *renderer = stack->data; struct dom_renderer *renderer = stack->renderer;
unsigned char *value; unsigned char *value;
int valuelen; int valuelen;
@ -577,7 +578,7 @@ render_dom_proc_instr_source(struct dom_stack *stack, struct dom_node *node, voi
static struct dom_node * static struct dom_node *
render_dom_element_source(struct dom_stack *stack, struct dom_node *node, void *data) render_dom_element_source(struct dom_stack *stack, struct dom_node *node, void *data)
{ {
struct dom_renderer *renderer = stack->data; struct dom_renderer *renderer = stack->renderer;
assert(node && renderer && renderer->document); assert(node && renderer && renderer->document);
@ -587,27 +588,61 @@ render_dom_element_source(struct dom_stack *stack, struct dom_node *node, void *
} }
static struct dom_node * static struct dom_node *
render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void *data) render_dom_element_end_source(struct dom_stack *stack, struct dom_node *node, void *data)
{ {
struct dom_stack_state *state = get_dom_stack_parent(stack); struct dom_renderer *renderer = stack->renderer;
struct dom_renderer *renderer = stack->data; struct sgml_parser_state *pstate = data;
struct screen_char *template = &renderer->styles[node->type]; struct scanner_token *token = &pstate->end_token;
struct dom_node *attribute = NULL; unsigned char *string = token->string;
int i; int length = token->length;
assert(node && renderer->document); assert(node && renderer && renderer->document);
assert(state && state->list);
/* The attributes are sorted but we want them in the original order */ if (!string || !length)
foreach_dom_node(i, node, state->list) { return node;
if (node->string >= renderer->position
&& (!attribute || node->string < attribute->string)) if (check_dom_node_source(renderer, string, length)) {
attribute = node; render_dom_flush(renderer, string);
renderer->position = string + length;
assert_source(renderer, renderer->position, 0);
} }
assert(attribute); render_dom_text(renderer, &renderer->styles[node->type], string, length);
node = attribute;
return node;
}
static struct dom_node *
render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->renderer;
struct screen_char *template = &renderer->styles[node->type];
assert(node && renderer->document);
#if 0
/* Disabled since the DOM source highlighter uses the stream parser and
* therefore the attributes is pushed to it in order. However, if/when
* we will support rendering (read saving) of loaded DOM trees this one
* small hack is needed to get the attributes in the original order. */
{
struct dom_stack_state *state = get_dom_stack_parent(stack);
struct dom_node *attribute = NULL;
int i;
assert(state && state->list);
/* The attributes are sorted but we want them in the original order */
foreach_dom_node(i, node, state->list) {
if (node->string >= renderer->position
&& (!attribute || node->string < attribute->string))
attribute = node;
}
assert(attribute);
node = attribute;
}
#endif
render_dom_node_text(renderer, template, node); render_dom_node_text(renderer, template, node);
if (node->data.attribute.value) { if (node->data.attribute.value) {
@ -668,7 +703,7 @@ render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void
return node; return node;
} }
static dom_stack_callback_T dom_source_renderer_callbacks[DOM_NODES] = { static dom_stack_callback_T dom_source_renderer_push_callbacks[DOM_NODES] = {
/* */ NULL, /* */ NULL,
/* DOM_NODE_ELEMENT */ render_dom_element_source, /* DOM_NODE_ELEMENT */ render_dom_element_source,
/* DOM_NODE_ATTRIBUTE */ render_dom_attribute_source, /* DOM_NODE_ATTRIBUTE */ render_dom_attribute_source,
@ -684,6 +719,22 @@ static dom_stack_callback_T dom_source_renderer_callbacks[DOM_NODES] = {
/* DOM_NODE_NOTATION */ render_dom_node_source, /* DOM_NODE_NOTATION */ render_dom_node_source,
}; };
static dom_stack_callback_T dom_source_renderer_pop_callbacks[DOM_NODES] = {
/* */ NULL,
/* DOM_NODE_ELEMENT */ render_dom_element_end_source,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ NULL,
/* DOM_NODE_CDATA_SECTION */ NULL,
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
};
/* Shared multiplexor between renderers */ /* Shared multiplexor between renderers */
void void
@ -694,19 +745,10 @@ render_dom_document(struct cache_entry *cached, struct document *document,
struct dom_node *root; struct dom_node *root;
struct dom_renderer renderer; struct dom_renderer renderer;
struct conv_table *convert_table; struct conv_table *convert_table;
dom_stack_callback_T *callbacks = dom_source_renderer_callbacks;
struct sgml_parser *parser; struct sgml_parser *parser;
struct dom_stack stack;
assert(document->options.plain); assert(document->options.plain);
parser = init_sgml_parser(cached, document);
if (!parser) return;
root = parse_sgml(parser, buffer);
done_sgml_parser(parser);
if (!root) return;
convert_table = get_convert_table(head, document->options.cp, convert_table = get_convert_table(head, document->options.cp,
document->options.assume_cp, document->options.assume_cp,
&document->cp, &document->cp,
@ -714,11 +756,19 @@ render_dom_document(struct cache_entry *cached, struct document *document,
document->options.hard_assume); document->options.hard_assume);
init_dom_renderer(&renderer, document, buffer, convert_table); init_dom_renderer(&renderer, document, buffer, convert_table);
init_dom_stack(&stack, &renderer, callbacks, 0);
document->bgcolor = document->options.default_bg; document->bgcolor = document->options.default_bg;
walk_dom_nodes(&stack, root); parser = init_sgml_parser(SGML_PARSER_STREAM, &renderer, cached,
document,
dom_source_renderer_push_callbacks,
dom_source_renderer_pop_callbacks);
if (!parser) return;
root = parse_sgml(parser, buffer);
done_sgml_parser(parser);
if (!root) return;
/* If there are no non-element nodes after the last element node make /* If there are no non-element nodes after the last element node make
* sure that we flush to the end of the cache entry source including * sure that we flush to the end of the cache entry source including
* the '>' of the last element tag if it has one. (bug 519) */ * the '>' of the last element tag if it has one. (bug 519) */
@ -727,5 +777,4 @@ render_dom_document(struct cache_entry *cached, struct document *document,
} }
done_dom_node(root); done_dom_node(root);
done_dom_stack(&stack);
} }

View File

@ -45,19 +45,23 @@ realloc_dom_stack_state_objects(struct dom_stack *stack)
} }
void void
init_dom_stack(struct dom_stack *stack, void *data, init_dom_stack(struct dom_stack *stack, void *parser, void *renderer,
dom_stack_callback_T callbacks[DOM_NODES], dom_stack_callback_T push_callbacks[DOM_NODES],
dom_stack_callback_T pop_callbacks[DOM_NODES],
size_t object_size) size_t object_size)
{ {
assert(stack); assert(stack);
memset(stack, 0, sizeof(*stack)); memset(stack, 0, sizeof(*stack));
stack->data = data; stack->parser = parser;
stack->renderer = renderer;
stack->object_size = object_size; stack->object_size = object_size;
if (callbacks) if (push_callbacks)
memcpy(stack->callbacks, callbacks, DOM_STACK_CALLBACKS_SIZE); memcpy(stack->push_callbacks, push_callbacks, DOM_STACK_CALLBACKS_SIZE);
if (pop_callbacks)
memcpy(stack->pop_callbacks, pop_callbacks, DOM_STACK_CALLBACKS_SIZE);
} }
void void
@ -94,7 +98,6 @@ push_dom_node(struct dom_stack *stack, struct dom_node *node)
if (stack->object_size) { if (stack->object_size) {
unsigned char *state_objects; unsigned char *state_objects;
size_t offset = stack->depth * stack->object_size;
state_objects = realloc_dom_stack_state_objects(stack); state_objects = realloc_dom_stack_state_objects(stack);
if (!state_objects) { if (!state_objects) {
@ -102,7 +105,7 @@ push_dom_node(struct dom_stack *stack, struct dom_node *node)
return NULL; return NULL;
} }
state->data = (void *) &state_objects[offset]; state->depth = stack->depth;
} }
state->node = node; state->node = node;
@ -111,9 +114,11 @@ push_dom_node(struct dom_stack *stack, struct dom_node *node)
* in the callbacks */ * in the callbacks */
stack->depth++; stack->depth++;
callback = stack->callbacks[node->type]; callback = stack->push_callbacks[node->type];
if (callback) { if (callback) {
node = callback(stack, node, state->data); void *state_data = get_dom_stack_state_data(stack, state);
node = callback(stack, node, state_data);
/* If the callback returned NULL pop the state immediately */ /* If the callback returned NULL pop the state immediately */
if (!node) { if (!node) {
@ -130,26 +135,26 @@ static int
do_pop_dom_node(struct dom_stack *stack, struct dom_stack_state *parent) do_pop_dom_node(struct dom_stack *stack, struct dom_stack_state *parent)
{ {
struct dom_stack_state *state; struct dom_stack_state *state;
dom_stack_callback_T callback;
assert(stack); assert(stack);
if (!dom_stack_has_parents(stack)) return 0; if (!dom_stack_has_parents(stack)) return 0;
state = get_dom_stack_top(stack); state = get_dom_stack_top(stack);
if (state->callback) { callback = stack->pop_callbacks[state->node->type];
/* Pass the node we are popping to and _not_ the state->node */ if (callback) {
state->callback(stack, parent->node, state->data); void *state_data = get_dom_stack_state_data(stack, state);
callback(stack, state->node, state_data);
} }
stack->depth--; stack->depth--;
assert(stack->depth >= 0); assert(stack->depth >= 0);
if (stack->object_size && state->data) { if (stack->object_size) {
size_t offset = stack->depth * stack->object_size; void *state_data = get_dom_stack_state_data(stack, state);
/* I tried to use item->data here but it caused a memory memset(state_data, 0, stack->object_size);
* corruption bug on fm. This is also less trustworthy in that
* the state->data pointer could have been mangled. --jonas */
memset(&stack->state_objects[offset], 0, stack->object_size);
} }
memset(state, 0, sizeof(*state)); memset(state, 0, sizeof(*state));
@ -170,16 +175,28 @@ void
pop_dom_nodes(struct dom_stack *stack, enum dom_node_type type, pop_dom_nodes(struct dom_stack *stack, enum dom_node_type type,
unsigned char *string, uint16_t length) unsigned char *string, uint16_t length)
{ {
struct dom_stack_state *state, *parent; struct dom_stack_state *state;
unsigned int pos;
if (!dom_stack_has_parents(stack)) return; if (!dom_stack_has_parents(stack)) return;
parent = search_dom_stack(stack, type, string, length); state = search_dom_stack(stack, type, string, length);
if (!parent) return; if (state)
pop_dom_state(stack, type, state);
}
void
pop_dom_state(struct dom_stack *stack, enum dom_node_type type,
struct dom_stack_state *target)
{
struct dom_stack_state *state;
unsigned int pos;
if (!target) return;
if (!dom_stack_has_parents(stack)) return;
foreachback_dom_state (stack, state, pos) { foreachback_dom_state (stack, state, pos) {
if (do_pop_dom_node(stack, parent)) if (do_pop_dom_node(stack, target))
break;; break;;
} }
} }

View File

@ -24,14 +24,9 @@ struct dom_stack_state {
/* The index (in the list above) which are currently being handled. */ /* The index (in the list above) which are currently being handled. */
size_t index; size_t index;
/* A callback registered to be called when the node is popped. Used for /* The depth of the state in the stack. This is amongst other things
* correctly highlighting ending elements (e.g. </a>). */ * used to get the state object data. */
dom_stack_callback_T callback; unsigned int depth;
/* Parser specific data. For the SGML parser this holds DTD-oriented
* info about the node (recorded in struct sgml_node_info). E.g.
* whether an element node is optional. */
void *data;
}; };
/* The DOM stack is a convenient way to traverse DOM trees. Also it /* The DOM stack is a convenient way to traverse DOM trees. Also it
@ -42,14 +37,20 @@ struct dom_stack {
struct dom_stack_state *states; struct dom_stack_state *states;
size_t depth; size_t depth;
/* This is one big array of parser specific objects which will be /* This is one big array of parser specific objects. */
* assigned to the data member of the individual dom_stack_states. */ /* The objects hold parser specific data. For the SGML parser this
* holds DTD-oriented info about the node (recorded in struct
* sgml_node_info). E.g. whether an element node is optional. */
unsigned char *state_objects; unsigned char *state_objects;
size_t object_size; size_t object_size;
/* Parser and document specific stuff */ /* Renderer specific callbacks for the streaming parser mode. */
dom_stack_callback_T callbacks[DOM_NODES]; dom_stack_callback_T push_callbacks[DOM_NODES];
void *data; dom_stack_callback_T pop_callbacks[DOM_NODES];
/* Data specific to the parser and renderer. */
void *renderer;
void *parser;
}; };
#define dom_stack_has_parents(nav) \ #define dom_stack_has_parents(nav) \
@ -66,6 +67,9 @@ get_dom_stack_state(struct dom_stack *stack, int top_offset)
#define get_dom_stack_parent(nav) get_dom_stack_state(nav, 1) #define get_dom_stack_parent(nav) get_dom_stack_state(nav, 1)
#define get_dom_stack_top(nav) get_dom_stack_state(nav, 0) #define get_dom_stack_top(nav) get_dom_stack_state(nav, 0)
#define get_dom_stack_state_data(stack, state) \
((void *) &(stack)->state_objects[(state)->depth * (stack)->object_size])
/* The state iterators do not include the bottom state */ /* The state iterators do not include the bottom state */
#define foreach_dom_state(nav, item, pos) \ #define foreach_dom_state(nav, item, pos) \
@ -84,6 +88,7 @@ search_dom_stack(struct dom_stack *stack, enum dom_node_type type,
struct dom_stack_state *state; struct dom_stack_state *state;
int pos; int pos;
/* FIXME: Take node subtype and compare if non-zero or something. */
foreachback_dom_state (stack, state, pos) { foreachback_dom_state (stack, state, pos) {
struct dom_node *parent = state->node; struct dom_node *parent = state->node;
@ -102,8 +107,9 @@ search_dom_stack(struct dom_stack *stack, enum dom_node_type type,
/* The @object_size arg tells whether the stack should allocate objects for each /* The @object_size arg tells whether the stack should allocate objects for each
* state to be assigned to the state's @data member. Zero means no state data should * state to be assigned to the state's @data member. Zero means no state data should
* be allocated. */ * be allocated. */
void init_dom_stack(struct dom_stack *stack, void *data, void init_dom_stack(struct dom_stack *stack, void *parser, void *renderer,
dom_stack_callback_T callbacks[DOM_NODES], dom_stack_callback_T push_callbacks[DOM_NODES],
dom_stack_callback_T pop_callbacks[DOM_NODES],
size_t object_size); size_t object_size);
void done_dom_stack(struct dom_stack *stack); void done_dom_stack(struct dom_stack *stack);
@ -118,6 +124,11 @@ void pop_dom_node(struct dom_stack *stack);
void pop_dom_nodes(struct dom_stack *stack, enum dom_node_type type, void pop_dom_nodes(struct dom_stack *stack, enum dom_node_type type,
unsigned char *string, uint16_t length); unsigned char *string, uint16_t length);
/* Pop all stack states until a specific state is reached. */
void
pop_dom_state(struct dom_stack *stack, enum dom_node_type type,
struct dom_stack_state *target);
/* Visit each node in the tree rooted at @root pre-order */ /* Visit each node in the tree rooted at @root pre-order */
void walk_dom_nodes(struct dom_stack *stack, struct dom_node *root); void walk_dom_nodes(struct dom_stack *stack, struct dom_node *root);

View File

@ -37,66 +37,7 @@ static struct sgml_node_info html_elements[HTML_ELEMENTS] = {
}; };
static struct dom_node *
add_html_element_end_node(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct sgml_parser *parser = stack->data;
struct dom_node *parent;
struct scanner_token *token;
assert(stack && parser && node);
assert(dom_stack_has_parents(stack));
/* Are we the actual node being popped? */
if (node != get_dom_stack_top(stack)->node)
return NULL;
parent = get_dom_stack_parent(stack)->node;
token = get_scanner_token(&parser->scanner);
assertm(token, "No token found in callback");
assertm(token->type == SGML_TOKEN_ELEMENT_END, "Bad token found in callback");
if (!token->length) return NULL;
return add_dom_element(parent, token->string, token->length);
}
/* TODO: We need to handle ascending of <br> and "<p>text1<p>text2" using data
* from sgml_node_info. */
static struct dom_node *
add_html_element_node(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct sgml_parser *parser = stack->data;
assert(stack && node);
assert(dom_stack_has_parents(stack));
/* TODO: Move to SGML parser main loop and disguise these element ends
* in some internal processing instruction. */
if (parser->flags & SGML_PARSER_ADD_ELEMENT_ENDS)
get_dom_stack_top(stack)->callback = add_html_element_end_node;
return node;
}
struct sgml_info sgml_html_info = { struct sgml_info sgml_html_info = {
html_attributes, html_attributes,
html_elements, html_elements,
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ add_html_element_node,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ NULL,
/* DOM_NODE_CDATA_SECTION */ NULL,
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
}
}; };

View File

@ -41,23 +41,27 @@ add_sgml_document(struct dom_stack *stack, struct uri *uri)
static inline struct dom_node * static inline struct dom_node *
add_sgml_element(struct dom_stack *stack, struct scanner_token *token) add_sgml_element(struct dom_stack *stack, struct scanner_token *token)
{ {
struct sgml_parser *parser = stack->data; struct sgml_parser *parser = stack->parser;
struct dom_node *parent = get_dom_stack_top(stack)->node; struct dom_node *parent = get_dom_stack_top(stack)->node;
struct dom_stack_state *state; struct dom_stack_state *state;
struct sgml_parser_state *pstate; struct sgml_parser_state *pstate;
struct dom_node *node; struct dom_node *node;
struct sgml_node_info *node_info;
node = add_dom_element(parent, token->string, token->length); node = add_dom_element(parent, token->string, token->length);
if (!node) return NULL;
if (!node || !push_dom_node(stack, node)) node_info = get_sgml_node_info(parser->info->elements, node);
node->data.element.type = node_info->type;
if (!push_dom_node(stack, node))
return NULL; return NULL;
state = get_dom_stack_top(stack); state = get_dom_stack_top(stack);
assert(node == state->node && state->data); assert(node == state->node);
pstate = state->data; pstate = get_dom_stack_state_data(stack, state);
pstate->info = get_sgml_node_info(parser->info->elements, node); pstate->info = node_info;
node->data.element.type = pstate->info->type;
return node; return node;
} }
@ -67,7 +71,7 @@ static inline void
add_sgml_attribute(struct dom_stack *stack, add_sgml_attribute(struct dom_stack *stack,
struct scanner_token *token, struct scanner_token *valtoken) struct scanner_token *token, struct scanner_token *valtoken)
{ {
struct sgml_parser *parser = stack->data; struct sgml_parser *parser = stack->parser;
struct dom_node *parent = get_dom_stack_top(stack)->node; struct dom_node *parent = get_dom_stack_top(stack)->node;
unsigned char *value = valtoken ? valtoken->string : NULL; unsigned char *value = valtoken ? valtoken->string : NULL;
uint16_t valuelen = valtoken ? valtoken->length : 0; uint16_t valuelen = valtoken ? valtoken->length : 0;
@ -77,9 +81,6 @@ add_sgml_attribute(struct dom_stack *stack,
node = add_dom_attribute(parent, token->string, token->length, node = add_dom_attribute(parent, token->string, token->length,
value, valuelen); value, valuelen);
if (!node || !push_dom_node(stack, node))
return;
info = get_sgml_node_info(parser->info->attributes, node); info = get_sgml_node_info(parser->info->attributes, node);
node->data.attribute.type = info->type; node->data.attribute.type = info->type;
@ -89,6 +90,9 @@ add_sgml_attribute(struct dom_stack *stack,
if (valtoken && valtoken->type == SGML_TOKEN_STRING) if (valtoken && valtoken->type == SGML_TOKEN_STRING)
node->data.attribute.quoted = 1; node->data.attribute.quoted = 1;
if (!node || !push_dom_node(stack, node))
return;
pop_dom_node(stack); pop_dom_node(stack);
} }
@ -243,8 +247,18 @@ parse_sgml_document(struct dom_stack *stack, struct scanner *scanner)
if (!token->length) { if (!token->length) {
pop_dom_node(stack); pop_dom_node(stack);
} else { } else {
pop_dom_nodes(stack, DOM_NODE_ELEMENT, struct dom_stack_state *state;
token->string, token->length);
state = search_dom_stack(stack, DOM_NODE_ELEMENT,
token->string, token->length);
if (state) {
struct sgml_parser_state *pstate;
pstate = get_dom_stack_state_data(stack, state);
copy_struct(&pstate->end_token, token);
pop_dom_state(stack, DOM_NODE_ELEMENT, state);
}
} }
skip_scanner_token(scanner); skip_scanner_token(scanner);
break; break;
@ -293,7 +307,10 @@ parse_sgml_document(struct dom_stack *stack, struct scanner *scanner)
struct sgml_parser * struct sgml_parser *
init_sgml_parser(struct cache_entry *cached, struct document *document) init_sgml_parser(enum sgml_parser_type type, void *renderer,
struct cache_entry *cached, struct document *document,
dom_stack_callback_T push_callbacks[DOM_NODES],
dom_stack_callback_T pop_callbacks[DOM_NODES])
{ {
size_t obj_size = sizeof(struct sgml_parser_state); size_t obj_size = sizeof(struct sgml_parser_state);
struct sgml_parser *parser; struct sgml_parser *parser;
@ -301,14 +318,13 @@ init_sgml_parser(struct cache_entry *cached, struct document *document)
parser = mem_calloc(1, sizeof(*parser)); parser = mem_calloc(1, sizeof(*parser));
if (!parser) return NULL; if (!parser) return NULL;
parser->type = type;
parser->document = document; parser->document = document;
parser->cache_entry = cached; parser->cache_entry = cached;
parser->info = &sgml_html_info; parser->info = &sgml_html_info;
init_dom_stack(&parser->stack, parser, parser->info->callbacks, obj_size); init_dom_stack(&parser->stack, parser, renderer,
push_callbacks, pop_callbacks, obj_size);
if (document->options.plain)
parser->flags |= SGML_PARSER_ADD_ELEMENT_ENDS;
return parser; return parser;
} }

View File

@ -11,13 +11,21 @@ struct cache_entry;
struct document; struct document;
struct string; struct string;
enum sgml_parser_flags { enum sgml_parser_type {
SGML_PARSER_ADD_ELEMENT_ENDS = 1, /* The first one is a DOM tree builder. */
SGML_PARSER_TREE,
/* The second one will simply push nodes on the stack, not building a
* DOM tree. This interface is similar to that of SAX (Simple API for
* XML) where events are fired when nodes are entered and exited. It is
* useful when you are not actually interested in the DOM tree, but can
* do all processing in a stream-like manner, such as when highlighting
* HTML code. */
SGML_PARSER_STREAM,
}; };
struct sgml_parser { struct sgml_parser {
/* The parser flags controls what gets added to the DOM tree */ enum sgml_parser_type type;
enum sgml_parser_flags flags;
struct sgml_info *info; struct sgml_info *info;
struct document *document; struct document *document;
@ -30,10 +38,16 @@ struct sgml_parser {
struct sgml_parser_state { struct sgml_parser_state {
struct sgml_node_info *info; struct sgml_node_info *info;
/* This is used by the DOM source renderer for highlighting the
* end-tag of an element. */
struct scanner_token end_token;
}; };
struct sgml_parser * struct sgml_parser *
init_sgml_parser(struct cache_entry *cached, struct document *document); init_sgml_parser(enum sgml_parser_type type, void *renderer,
struct cache_entry *cached, struct document *document,
dom_stack_callback_T push_callbacks[DOM_NODES],
dom_stack_callback_T pop_callbacks[DOM_NODES]);
void done_sgml_parser(struct sgml_parser *parser); void done_sgml_parser(struct sgml_parser *parser);

View File

@ -76,7 +76,6 @@ get_sgml_node_info(struct sgml_node_info list[], struct dom_node *node)
struct sgml_info { struct sgml_info {
struct sgml_node_info *attributes; struct sgml_node_info *attributes;
struct sgml_node_info *elements; struct sgml_node_info *elements;
dom_stack_callback_T callbacks[DOM_NODES];
}; };
#endif #endif