mirror of
https://github.com/rkd77/elinks.git
synced 2025-02-02 15:09:23 -05:00
Merge with dom-parser-stream
This commit is contained in:
commit
1d24d549e4
@ -28,6 +28,7 @@
|
||||
#include "util/box.h"
|
||||
#include "util/error.h"
|
||||
#include "util/memory.h"
|
||||
#include "util/scanner.h"
|
||||
#include "util/snprintf.h"
|
||||
#include "util/string.h"
|
||||
|
||||
@ -387,7 +388,7 @@ add_dom_link(struct dom_renderer *renderer, unsigned char *string, int length)
|
||||
static struct dom_node *
|
||||
render_dom_tree(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
{
|
||||
struct dom_renderer *renderer = stack->data;
|
||||
struct dom_renderer *renderer = stack->renderer;
|
||||
struct screen_char *template = &renderer->styles[node->type];
|
||||
unsigned char *name, *value;
|
||||
|
||||
@ -407,7 +408,7 @@ render_dom_tree(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
static struct dom_node *
|
||||
render_dom_tree_id_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
{
|
||||
struct dom_renderer *renderer = stack->data;
|
||||
struct dom_renderer *renderer = stack->renderer;
|
||||
struct document *document = renderer->document;
|
||||
struct screen_char *template = &renderer->styles[node->type];
|
||||
unsigned char *name, *value, *id;
|
||||
@ -430,7 +431,7 @@ render_dom_tree_id_leaf(struct dom_stack *stack, struct dom_node *node, void *da
|
||||
static struct dom_node *
|
||||
render_dom_tree_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
{
|
||||
struct dom_renderer *renderer = stack->data;
|
||||
struct dom_renderer *renderer = stack->renderer;
|
||||
struct document *document = renderer->document;
|
||||
struct screen_char *template = &renderer->styles[node->type];
|
||||
unsigned char *name, *value;
|
||||
@ -452,7 +453,7 @@ render_dom_tree_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
static struct dom_node *
|
||||
render_dom_tree_branch(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
{
|
||||
struct dom_renderer *renderer = stack->data;
|
||||
struct dom_renderer *renderer = stack->renderer;
|
||||
struct document *document = renderer->document;
|
||||
struct screen_char *template = &renderer->styles[node->type];
|
||||
unsigned char *name, *id;
|
||||
@ -536,7 +537,7 @@ render_dom_node_text(struct dom_renderer *renderer, struct screen_char *template
|
||||
static struct dom_node *
|
||||
render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
{
|
||||
struct dom_renderer *renderer = stack->data;
|
||||
struct dom_renderer *renderer = stack->renderer;
|
||||
|
||||
assert(node && renderer && renderer->document);
|
||||
|
||||
@ -550,7 +551,7 @@ render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *dat
|
||||
static struct dom_node *
|
||||
render_dom_proc_instr_source(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
{
|
||||
struct dom_renderer *renderer = stack->data;
|
||||
struct dom_renderer *renderer = stack->renderer;
|
||||
unsigned char *value;
|
||||
int valuelen;
|
||||
|
||||
@ -577,7 +578,7 @@ render_dom_proc_instr_source(struct dom_stack *stack, struct dom_node *node, voi
|
||||
static struct dom_node *
|
||||
render_dom_element_source(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
{
|
||||
struct dom_renderer *renderer = stack->data;
|
||||
struct dom_renderer *renderer = stack->renderer;
|
||||
|
||||
assert(node && renderer && renderer->document);
|
||||
|
||||
@ -587,27 +588,61 @@ render_dom_element_source(struct dom_stack *stack, struct dom_node *node, void *
|
||||
}
|
||||
|
||||
static struct dom_node *
|
||||
render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
render_dom_element_end_source(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
{
|
||||
struct dom_stack_state *state = get_dom_stack_parent(stack);
|
||||
struct dom_renderer *renderer = stack->data;
|
||||
struct screen_char *template = &renderer->styles[node->type];
|
||||
struct dom_node *attribute = NULL;
|
||||
int i;
|
||||
struct dom_renderer *renderer = stack->renderer;
|
||||
struct sgml_parser_state *pstate = data;
|
||||
struct scanner_token *token = &pstate->end_token;
|
||||
unsigned char *string = token->string;
|
||||
int length = token->length;
|
||||
|
||||
assert(node && renderer->document);
|
||||
assert(state && state->list);
|
||||
assert(node && renderer && renderer->document);
|
||||
|
||||
/* The attributes are sorted but we want them in the original order */
|
||||
foreach_dom_node(i, node, state->list) {
|
||||
if (node->string >= renderer->position
|
||||
&& (!attribute || node->string < attribute->string))
|
||||
attribute = node;
|
||||
if (!string || !length)
|
||||
return node;
|
||||
|
||||
if (check_dom_node_source(renderer, string, length)) {
|
||||
render_dom_flush(renderer, string);
|
||||
renderer->position = string + length;
|
||||
assert_source(renderer, renderer->position, 0);
|
||||
}
|
||||
|
||||
assert(attribute);
|
||||
node = attribute;
|
||||
render_dom_text(renderer, &renderer->styles[node->type], string, length);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
static struct dom_node *
|
||||
render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
{
|
||||
struct dom_renderer *renderer = stack->renderer;
|
||||
struct screen_char *template = &renderer->styles[node->type];
|
||||
|
||||
assert(node && renderer->document);
|
||||
|
||||
#if 0
|
||||
/* Disabled since the DOM source highlighter uses the stream parser and
|
||||
* therefore the attributes is pushed to it in order. However, if/when
|
||||
* we will support rendering (read saving) of loaded DOM trees this one
|
||||
* small hack is needed to get the attributes in the original order. */
|
||||
{
|
||||
struct dom_stack_state *state = get_dom_stack_parent(stack);
|
||||
struct dom_node *attribute = NULL;
|
||||
int i;
|
||||
|
||||
assert(state && state->list);
|
||||
|
||||
/* The attributes are sorted but we want them in the original order */
|
||||
foreach_dom_node(i, node, state->list) {
|
||||
if (node->string >= renderer->position
|
||||
&& (!attribute || node->string < attribute->string))
|
||||
attribute = node;
|
||||
}
|
||||
|
||||
assert(attribute);
|
||||
node = attribute;
|
||||
}
|
||||
#endif
|
||||
render_dom_node_text(renderer, template, node);
|
||||
|
||||
if (node->data.attribute.value) {
|
||||
@ -668,7 +703,7 @@ render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void
|
||||
return node;
|
||||
}
|
||||
|
||||
static dom_stack_callback_T dom_source_renderer_callbacks[DOM_NODES] = {
|
||||
static dom_stack_callback_T dom_source_renderer_push_callbacks[DOM_NODES] = {
|
||||
/* */ NULL,
|
||||
/* DOM_NODE_ELEMENT */ render_dom_element_source,
|
||||
/* DOM_NODE_ATTRIBUTE */ render_dom_attribute_source,
|
||||
@ -684,6 +719,22 @@ static dom_stack_callback_T dom_source_renderer_callbacks[DOM_NODES] = {
|
||||
/* DOM_NODE_NOTATION */ render_dom_node_source,
|
||||
};
|
||||
|
||||
static dom_stack_callback_T dom_source_renderer_pop_callbacks[DOM_NODES] = {
|
||||
/* */ NULL,
|
||||
/* DOM_NODE_ELEMENT */ render_dom_element_end_source,
|
||||
/* DOM_NODE_ATTRIBUTE */ NULL,
|
||||
/* DOM_NODE_TEXT */ NULL,
|
||||
/* DOM_NODE_CDATA_SECTION */ NULL,
|
||||
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
|
||||
/* DOM_NODE_ENTITY */ NULL,
|
||||
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
|
||||
/* DOM_NODE_COMMENT */ NULL,
|
||||
/* DOM_NODE_DOCUMENT */ NULL,
|
||||
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
|
||||
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
|
||||
/* DOM_NODE_NOTATION */ NULL,
|
||||
};
|
||||
|
||||
|
||||
/* Shared multiplexor between renderers */
|
||||
void
|
||||
@ -694,19 +745,10 @@ render_dom_document(struct cache_entry *cached, struct document *document,
|
||||
struct dom_node *root;
|
||||
struct dom_renderer renderer;
|
||||
struct conv_table *convert_table;
|
||||
dom_stack_callback_T *callbacks = dom_source_renderer_callbacks;
|
||||
struct sgml_parser *parser;
|
||||
struct dom_stack stack;
|
||||
|
||||
assert(document->options.plain);
|
||||
|
||||
parser = init_sgml_parser(cached, document);
|
||||
if (!parser) return;
|
||||
|
||||
root = parse_sgml(parser, buffer);
|
||||
done_sgml_parser(parser);
|
||||
if (!root) return;
|
||||
|
||||
convert_table = get_convert_table(head, document->options.cp,
|
||||
document->options.assume_cp,
|
||||
&document->cp,
|
||||
@ -714,11 +756,19 @@ render_dom_document(struct cache_entry *cached, struct document *document,
|
||||
document->options.hard_assume);
|
||||
|
||||
init_dom_renderer(&renderer, document, buffer, convert_table);
|
||||
init_dom_stack(&stack, &renderer, callbacks, 0);
|
||||
|
||||
document->bgcolor = document->options.default_bg;
|
||||
|
||||
walk_dom_nodes(&stack, root);
|
||||
parser = init_sgml_parser(SGML_PARSER_STREAM, &renderer, cached,
|
||||
document,
|
||||
dom_source_renderer_push_callbacks,
|
||||
dom_source_renderer_pop_callbacks);
|
||||
if (!parser) return;
|
||||
|
||||
root = parse_sgml(parser, buffer);
|
||||
done_sgml_parser(parser);
|
||||
if (!root) return;
|
||||
|
||||
/* If there are no non-element nodes after the last element node make
|
||||
* sure that we flush to the end of the cache entry source including
|
||||
* the '>' of the last element tag if it has one. (bug 519) */
|
||||
@ -727,5 +777,4 @@ render_dom_document(struct cache_entry *cached, struct document *document,
|
||||
}
|
||||
|
||||
done_dom_node(root);
|
||||
done_dom_stack(&stack);
|
||||
}
|
||||
|
@ -45,19 +45,23 @@ realloc_dom_stack_state_objects(struct dom_stack *stack)
|
||||
}
|
||||
|
||||
void
|
||||
init_dom_stack(struct dom_stack *stack, void *data,
|
||||
dom_stack_callback_T callbacks[DOM_NODES],
|
||||
init_dom_stack(struct dom_stack *stack, void *parser, void *renderer,
|
||||
dom_stack_callback_T push_callbacks[DOM_NODES],
|
||||
dom_stack_callback_T pop_callbacks[DOM_NODES],
|
||||
size_t object_size)
|
||||
{
|
||||
assert(stack);
|
||||
|
||||
memset(stack, 0, sizeof(*stack));
|
||||
|
||||
stack->data = data;
|
||||
stack->parser = parser;
|
||||
stack->renderer = renderer;
|
||||
stack->object_size = object_size;
|
||||
|
||||
if (callbacks)
|
||||
memcpy(stack->callbacks, callbacks, DOM_STACK_CALLBACKS_SIZE);
|
||||
if (push_callbacks)
|
||||
memcpy(stack->push_callbacks, push_callbacks, DOM_STACK_CALLBACKS_SIZE);
|
||||
if (pop_callbacks)
|
||||
memcpy(stack->pop_callbacks, pop_callbacks, DOM_STACK_CALLBACKS_SIZE);
|
||||
}
|
||||
|
||||
void
|
||||
@ -94,7 +98,6 @@ push_dom_node(struct dom_stack *stack, struct dom_node *node)
|
||||
|
||||
if (stack->object_size) {
|
||||
unsigned char *state_objects;
|
||||
size_t offset = stack->depth * stack->object_size;
|
||||
|
||||
state_objects = realloc_dom_stack_state_objects(stack);
|
||||
if (!state_objects) {
|
||||
@ -102,7 +105,7 @@ push_dom_node(struct dom_stack *stack, struct dom_node *node)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
state->data = (void *) &state_objects[offset];
|
||||
state->depth = stack->depth;
|
||||
}
|
||||
|
||||
state->node = node;
|
||||
@ -111,9 +114,11 @@ push_dom_node(struct dom_stack *stack, struct dom_node *node)
|
||||
* in the callbacks */
|
||||
stack->depth++;
|
||||
|
||||
callback = stack->callbacks[node->type];
|
||||
callback = stack->push_callbacks[node->type];
|
||||
if (callback) {
|
||||
node = callback(stack, node, state->data);
|
||||
void *state_data = get_dom_stack_state_data(stack, state);
|
||||
|
||||
node = callback(stack, node, state_data);
|
||||
|
||||
/* If the callback returned NULL pop the state immediately */
|
||||
if (!node) {
|
||||
@ -130,26 +135,26 @@ static int
|
||||
do_pop_dom_node(struct dom_stack *stack, struct dom_stack_state *parent)
|
||||
{
|
||||
struct dom_stack_state *state;
|
||||
dom_stack_callback_T callback;
|
||||
|
||||
assert(stack);
|
||||
if (!dom_stack_has_parents(stack)) return 0;
|
||||
|
||||
state = get_dom_stack_top(stack);
|
||||
if (state->callback) {
|
||||
/* Pass the node we are popping to and _not_ the state->node */
|
||||
state->callback(stack, parent->node, state->data);
|
||||
callback = stack->pop_callbacks[state->node->type];
|
||||
if (callback) {
|
||||
void *state_data = get_dom_stack_state_data(stack, state);
|
||||
|
||||
callback(stack, state->node, state_data);
|
||||
}
|
||||
|
||||
stack->depth--;
|
||||
assert(stack->depth >= 0);
|
||||
|
||||
if (stack->object_size && state->data) {
|
||||
size_t offset = stack->depth * stack->object_size;
|
||||
if (stack->object_size) {
|
||||
void *state_data = get_dom_stack_state_data(stack, state);
|
||||
|
||||
/* I tried to use item->data here but it caused a memory
|
||||
* corruption bug on fm. This is also less trustworthy in that
|
||||
* the state->data pointer could have been mangled. --jonas */
|
||||
memset(&stack->state_objects[offset], 0, stack->object_size);
|
||||
memset(state_data, 0, stack->object_size);
|
||||
}
|
||||
|
||||
memset(state, 0, sizeof(*state));
|
||||
@ -170,16 +175,28 @@ void
|
||||
pop_dom_nodes(struct dom_stack *stack, enum dom_node_type type,
|
||||
unsigned char *string, uint16_t length)
|
||||
{
|
||||
struct dom_stack_state *state, *parent;
|
||||
unsigned int pos;
|
||||
struct dom_stack_state *state;
|
||||
|
||||
if (!dom_stack_has_parents(stack)) return;
|
||||
|
||||
parent = search_dom_stack(stack, type, string, length);
|
||||
if (!parent) return;
|
||||
state = search_dom_stack(stack, type, string, length);
|
||||
if (state)
|
||||
pop_dom_state(stack, type, state);
|
||||
}
|
||||
|
||||
void
|
||||
pop_dom_state(struct dom_stack *stack, enum dom_node_type type,
|
||||
struct dom_stack_state *target)
|
||||
{
|
||||
struct dom_stack_state *state;
|
||||
unsigned int pos;
|
||||
|
||||
if (!target) return;
|
||||
|
||||
if (!dom_stack_has_parents(stack)) return;
|
||||
|
||||
foreachback_dom_state (stack, state, pos) {
|
||||
if (do_pop_dom_node(stack, parent))
|
||||
if (do_pop_dom_node(stack, target))
|
||||
break;;
|
||||
}
|
||||
}
|
||||
|
@ -24,14 +24,9 @@ struct dom_stack_state {
|
||||
/* The index (in the list above) which are currently being handled. */
|
||||
size_t index;
|
||||
|
||||
/* A callback registered to be called when the node is popped. Used for
|
||||
* correctly highlighting ending elements (e.g. </a>). */
|
||||
dom_stack_callback_T callback;
|
||||
|
||||
/* Parser specific data. For the SGML parser this holds DTD-oriented
|
||||
* info about the node (recorded in struct sgml_node_info). E.g.
|
||||
* whether an element node is optional. */
|
||||
void *data;
|
||||
/* The depth of the state in the stack. This is amongst other things
|
||||
* used to get the state object data. */
|
||||
unsigned int depth;
|
||||
};
|
||||
|
||||
/* The DOM stack is a convenient way to traverse DOM trees. Also it
|
||||
@ -42,14 +37,20 @@ struct dom_stack {
|
||||
struct dom_stack_state *states;
|
||||
size_t depth;
|
||||
|
||||
/* This is one big array of parser specific objects which will be
|
||||
* assigned to the data member of the individual dom_stack_states. */
|
||||
/* This is one big array of parser specific objects. */
|
||||
/* The objects hold parser specific data. For the SGML parser this
|
||||
* holds DTD-oriented info about the node (recorded in struct
|
||||
* sgml_node_info). E.g. whether an element node is optional. */
|
||||
unsigned char *state_objects;
|
||||
size_t object_size;
|
||||
|
||||
/* Parser and document specific stuff */
|
||||
dom_stack_callback_T callbacks[DOM_NODES];
|
||||
void *data;
|
||||
/* Renderer specific callbacks for the streaming parser mode. */
|
||||
dom_stack_callback_T push_callbacks[DOM_NODES];
|
||||
dom_stack_callback_T pop_callbacks[DOM_NODES];
|
||||
|
||||
/* Data specific to the parser and renderer. */
|
||||
void *renderer;
|
||||
void *parser;
|
||||
};
|
||||
|
||||
#define dom_stack_has_parents(nav) \
|
||||
@ -66,6 +67,9 @@ get_dom_stack_state(struct dom_stack *stack, int top_offset)
|
||||
#define get_dom_stack_parent(nav) get_dom_stack_state(nav, 1)
|
||||
#define get_dom_stack_top(nav) get_dom_stack_state(nav, 0)
|
||||
|
||||
#define get_dom_stack_state_data(stack, state) \
|
||||
((void *) &(stack)->state_objects[(state)->depth * (stack)->object_size])
|
||||
|
||||
/* The state iterators do not include the bottom state */
|
||||
|
||||
#define foreach_dom_state(nav, item, pos) \
|
||||
@ -84,6 +88,7 @@ search_dom_stack(struct dom_stack *stack, enum dom_node_type type,
|
||||
struct dom_stack_state *state;
|
||||
int pos;
|
||||
|
||||
/* FIXME: Take node subtype and compare if non-zero or something. */
|
||||
foreachback_dom_state (stack, state, pos) {
|
||||
struct dom_node *parent = state->node;
|
||||
|
||||
@ -102,8 +107,9 @@ search_dom_stack(struct dom_stack *stack, enum dom_node_type type,
|
||||
/* The @object_size arg tells whether the stack should allocate objects for each
|
||||
* state to be assigned to the state's @data member. Zero means no state data should
|
||||
* be allocated. */
|
||||
void init_dom_stack(struct dom_stack *stack, void *data,
|
||||
dom_stack_callback_T callbacks[DOM_NODES],
|
||||
void init_dom_stack(struct dom_stack *stack, void *parser, void *renderer,
|
||||
dom_stack_callback_T push_callbacks[DOM_NODES],
|
||||
dom_stack_callback_T pop_callbacks[DOM_NODES],
|
||||
size_t object_size);
|
||||
void done_dom_stack(struct dom_stack *stack);
|
||||
|
||||
@ -118,6 +124,11 @@ void pop_dom_node(struct dom_stack *stack);
|
||||
void pop_dom_nodes(struct dom_stack *stack, enum dom_node_type type,
|
||||
unsigned char *string, uint16_t length);
|
||||
|
||||
/* Pop all stack states until a specific state is reached. */
|
||||
void
|
||||
pop_dom_state(struct dom_stack *stack, enum dom_node_type type,
|
||||
struct dom_stack_state *target);
|
||||
|
||||
/* Visit each node in the tree rooted at @root pre-order */
|
||||
void walk_dom_nodes(struct dom_stack *stack, struct dom_node *root);
|
||||
|
||||
|
@ -37,66 +37,7 @@ static struct sgml_node_info html_elements[HTML_ELEMENTS] = {
|
||||
};
|
||||
|
||||
|
||||
static struct dom_node *
|
||||
add_html_element_end_node(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
{
|
||||
struct sgml_parser *parser = stack->data;
|
||||
struct dom_node *parent;
|
||||
struct scanner_token *token;
|
||||
|
||||
assert(stack && parser && node);
|
||||
assert(dom_stack_has_parents(stack));
|
||||
|
||||
/* Are we the actual node being popped? */
|
||||
if (node != get_dom_stack_top(stack)->node)
|
||||
return NULL;
|
||||
|
||||
parent = get_dom_stack_parent(stack)->node;
|
||||
token = get_scanner_token(&parser->scanner);
|
||||
|
||||
assertm(token, "No token found in callback");
|
||||
assertm(token->type == SGML_TOKEN_ELEMENT_END, "Bad token found in callback");
|
||||
|
||||
if (!token->length) return NULL;
|
||||
|
||||
return add_dom_element(parent, token->string, token->length);
|
||||
}
|
||||
|
||||
/* TODO: We need to handle ascending of <br> and "<p>text1<p>text2" using data
|
||||
* from sgml_node_info. */
|
||||
static struct dom_node *
|
||||
add_html_element_node(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
{
|
||||
struct sgml_parser *parser = stack->data;
|
||||
|
||||
assert(stack && node);
|
||||
assert(dom_stack_has_parents(stack));
|
||||
|
||||
/* TODO: Move to SGML parser main loop and disguise these element ends
|
||||
* in some internal processing instruction. */
|
||||
if (parser->flags & SGML_PARSER_ADD_ELEMENT_ENDS)
|
||||
get_dom_stack_top(stack)->callback = add_html_element_end_node;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
struct sgml_info sgml_html_info = {
|
||||
html_attributes,
|
||||
html_elements,
|
||||
{
|
||||
/* */ NULL,
|
||||
/* DOM_NODE_ELEMENT */ add_html_element_node,
|
||||
/* DOM_NODE_ATTRIBUTE */ NULL,
|
||||
/* DOM_NODE_TEXT */ NULL,
|
||||
/* DOM_NODE_CDATA_SECTION */ NULL,
|
||||
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
|
||||
/* DOM_NODE_ENTITY */ NULL,
|
||||
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
|
||||
/* DOM_NODE_COMMENT */ NULL,
|
||||
/* DOM_NODE_DOCUMENT */ NULL,
|
||||
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
|
||||
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
|
||||
/* DOM_NODE_NOTATION */ NULL,
|
||||
}
|
||||
};
|
||||
|
@ -41,23 +41,27 @@ add_sgml_document(struct dom_stack *stack, struct uri *uri)
|
||||
static inline struct dom_node *
|
||||
add_sgml_element(struct dom_stack *stack, struct scanner_token *token)
|
||||
{
|
||||
struct sgml_parser *parser = stack->data;
|
||||
struct sgml_parser *parser = stack->parser;
|
||||
struct dom_node *parent = get_dom_stack_top(stack)->node;
|
||||
struct dom_stack_state *state;
|
||||
struct sgml_parser_state *pstate;
|
||||
struct dom_node *node;
|
||||
struct sgml_node_info *node_info;
|
||||
|
||||
node = add_dom_element(parent, token->string, token->length);
|
||||
if (!node) return NULL;
|
||||
|
||||
if (!node || !push_dom_node(stack, node))
|
||||
node_info = get_sgml_node_info(parser->info->elements, node);
|
||||
node->data.element.type = node_info->type;
|
||||
|
||||
if (!push_dom_node(stack, node))
|
||||
return NULL;
|
||||
|
||||
state = get_dom_stack_top(stack);
|
||||
assert(node == state->node && state->data);
|
||||
assert(node == state->node);
|
||||
|
||||
pstate = state->data;
|
||||
pstate->info = get_sgml_node_info(parser->info->elements, node);
|
||||
node->data.element.type = pstate->info->type;
|
||||
pstate = get_dom_stack_state_data(stack, state);
|
||||
pstate->info = node_info;
|
||||
|
||||
return node;
|
||||
}
|
||||
@ -67,7 +71,7 @@ static inline void
|
||||
add_sgml_attribute(struct dom_stack *stack,
|
||||
struct scanner_token *token, struct scanner_token *valtoken)
|
||||
{
|
||||
struct sgml_parser *parser = stack->data;
|
||||
struct sgml_parser *parser = stack->parser;
|
||||
struct dom_node *parent = get_dom_stack_top(stack)->node;
|
||||
unsigned char *value = valtoken ? valtoken->string : NULL;
|
||||
uint16_t valuelen = valtoken ? valtoken->length : 0;
|
||||
@ -77,9 +81,6 @@ add_sgml_attribute(struct dom_stack *stack,
|
||||
node = add_dom_attribute(parent, token->string, token->length,
|
||||
value, valuelen);
|
||||
|
||||
if (!node || !push_dom_node(stack, node))
|
||||
return;
|
||||
|
||||
info = get_sgml_node_info(parser->info->attributes, node);
|
||||
|
||||
node->data.attribute.type = info->type;
|
||||
@ -89,6 +90,9 @@ add_sgml_attribute(struct dom_stack *stack,
|
||||
if (valtoken && valtoken->type == SGML_TOKEN_STRING)
|
||||
node->data.attribute.quoted = 1;
|
||||
|
||||
if (!node || !push_dom_node(stack, node))
|
||||
return;
|
||||
|
||||
pop_dom_node(stack);
|
||||
}
|
||||
|
||||
@ -243,8 +247,18 @@ parse_sgml_document(struct dom_stack *stack, struct scanner *scanner)
|
||||
if (!token->length) {
|
||||
pop_dom_node(stack);
|
||||
} else {
|
||||
pop_dom_nodes(stack, DOM_NODE_ELEMENT,
|
||||
token->string, token->length);
|
||||
struct dom_stack_state *state;
|
||||
|
||||
state = search_dom_stack(stack, DOM_NODE_ELEMENT,
|
||||
token->string, token->length);
|
||||
if (state) {
|
||||
struct sgml_parser_state *pstate;
|
||||
|
||||
pstate = get_dom_stack_state_data(stack, state);
|
||||
copy_struct(&pstate->end_token, token);
|
||||
|
||||
pop_dom_state(stack, DOM_NODE_ELEMENT, state);
|
||||
}
|
||||
}
|
||||
skip_scanner_token(scanner);
|
||||
break;
|
||||
@ -293,7 +307,10 @@ parse_sgml_document(struct dom_stack *stack, struct scanner *scanner)
|
||||
|
||||
|
||||
struct sgml_parser *
|
||||
init_sgml_parser(struct cache_entry *cached, struct document *document)
|
||||
init_sgml_parser(enum sgml_parser_type type, void *renderer,
|
||||
struct cache_entry *cached, struct document *document,
|
||||
dom_stack_callback_T push_callbacks[DOM_NODES],
|
||||
dom_stack_callback_T pop_callbacks[DOM_NODES])
|
||||
{
|
||||
size_t obj_size = sizeof(struct sgml_parser_state);
|
||||
struct sgml_parser *parser;
|
||||
@ -301,14 +318,13 @@ init_sgml_parser(struct cache_entry *cached, struct document *document)
|
||||
parser = mem_calloc(1, sizeof(*parser));
|
||||
if (!parser) return NULL;
|
||||
|
||||
parser->type = type;
|
||||
parser->document = document;
|
||||
parser->cache_entry = cached;
|
||||
parser->info = &sgml_html_info;
|
||||
|
||||
init_dom_stack(&parser->stack, parser, parser->info->callbacks, obj_size);
|
||||
|
||||
if (document->options.plain)
|
||||
parser->flags |= SGML_PARSER_ADD_ELEMENT_ENDS;
|
||||
init_dom_stack(&parser->stack, parser, renderer,
|
||||
push_callbacks, pop_callbacks, obj_size);
|
||||
|
||||
return parser;
|
||||
}
|
||||
|
@ -11,13 +11,21 @@ struct cache_entry;
|
||||
struct document;
|
||||
struct string;
|
||||
|
||||
enum sgml_parser_flags {
|
||||
SGML_PARSER_ADD_ELEMENT_ENDS = 1,
|
||||
enum sgml_parser_type {
|
||||
/* The first one is a DOM tree builder. */
|
||||
SGML_PARSER_TREE,
|
||||
/* The second one will simply push nodes on the stack, not building a
|
||||
* DOM tree. This interface is similar to that of SAX (Simple API for
|
||||
* XML) where events are fired when nodes are entered and exited. It is
|
||||
* useful when you are not actually interested in the DOM tree, but can
|
||||
* do all processing in a stream-like manner, such as when highlighting
|
||||
* HTML code. */
|
||||
SGML_PARSER_STREAM,
|
||||
};
|
||||
|
||||
struct sgml_parser {
|
||||
/* The parser flags controls what gets added to the DOM tree */
|
||||
enum sgml_parser_flags flags;
|
||||
enum sgml_parser_type type;
|
||||
|
||||
struct sgml_info *info;
|
||||
|
||||
struct document *document;
|
||||
@ -30,10 +38,16 @@ struct sgml_parser {
|
||||
|
||||
struct sgml_parser_state {
|
||||
struct sgml_node_info *info;
|
||||
/* This is used by the DOM source renderer for highlighting the
|
||||
* end-tag of an element. */
|
||||
struct scanner_token end_token;
|
||||
};
|
||||
|
||||
struct sgml_parser *
|
||||
init_sgml_parser(struct cache_entry *cached, struct document *document);
|
||||
init_sgml_parser(enum sgml_parser_type type, void *renderer,
|
||||
struct cache_entry *cached, struct document *document,
|
||||
dom_stack_callback_T push_callbacks[DOM_NODES],
|
||||
dom_stack_callback_T pop_callbacks[DOM_NODES]);
|
||||
|
||||
void done_sgml_parser(struct sgml_parser *parser);
|
||||
|
||||
|
@ -76,7 +76,6 @@ get_sgml_node_info(struct sgml_node_info list[], struct dom_node *node)
|
||||
struct sgml_info {
|
||||
struct sgml_node_info *attributes;
|
||||
struct sgml_node_info *elements;
|
||||
dom_stack_callback_T callbacks[DOM_NODES];
|
||||
};
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user