mirror of
https://github.com/rkd77/elinks.git
synced 2025-06-30 22:19:29 -04:00
Merge with dom-parser-stream
This commit is contained in:
commit
1d24d549e4
@ -28,6 +28,7 @@
|
|||||||
#include "util/box.h"
|
#include "util/box.h"
|
||||||
#include "util/error.h"
|
#include "util/error.h"
|
||||||
#include "util/memory.h"
|
#include "util/memory.h"
|
||||||
|
#include "util/scanner.h"
|
||||||
#include "util/snprintf.h"
|
#include "util/snprintf.h"
|
||||||
#include "util/string.h"
|
#include "util/string.h"
|
||||||
|
|
||||||
@ -387,7 +388,7 @@ add_dom_link(struct dom_renderer *renderer, unsigned char *string, int length)
|
|||||||
static struct dom_node *
|
static struct dom_node *
|
||||||
render_dom_tree(struct dom_stack *stack, struct dom_node *node, void *data)
|
render_dom_tree(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||||
{
|
{
|
||||||
struct dom_renderer *renderer = stack->data;
|
struct dom_renderer *renderer = stack->renderer;
|
||||||
struct screen_char *template = &renderer->styles[node->type];
|
struct screen_char *template = &renderer->styles[node->type];
|
||||||
unsigned char *name, *value;
|
unsigned char *name, *value;
|
||||||
|
|
||||||
@ -407,7 +408,7 @@ render_dom_tree(struct dom_stack *stack, struct dom_node *node, void *data)
|
|||||||
static struct dom_node *
|
static struct dom_node *
|
||||||
render_dom_tree_id_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
|
render_dom_tree_id_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||||
{
|
{
|
||||||
struct dom_renderer *renderer = stack->data;
|
struct dom_renderer *renderer = stack->renderer;
|
||||||
struct document *document = renderer->document;
|
struct document *document = renderer->document;
|
||||||
struct screen_char *template = &renderer->styles[node->type];
|
struct screen_char *template = &renderer->styles[node->type];
|
||||||
unsigned char *name, *value, *id;
|
unsigned char *name, *value, *id;
|
||||||
@ -430,7 +431,7 @@ render_dom_tree_id_leaf(struct dom_stack *stack, struct dom_node *node, void *da
|
|||||||
static struct dom_node *
|
static struct dom_node *
|
||||||
render_dom_tree_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
|
render_dom_tree_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||||
{
|
{
|
||||||
struct dom_renderer *renderer = stack->data;
|
struct dom_renderer *renderer = stack->renderer;
|
||||||
struct document *document = renderer->document;
|
struct document *document = renderer->document;
|
||||||
struct screen_char *template = &renderer->styles[node->type];
|
struct screen_char *template = &renderer->styles[node->type];
|
||||||
unsigned char *name, *value;
|
unsigned char *name, *value;
|
||||||
@ -452,7 +453,7 @@ render_dom_tree_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
|
|||||||
static struct dom_node *
|
static struct dom_node *
|
||||||
render_dom_tree_branch(struct dom_stack *stack, struct dom_node *node, void *data)
|
render_dom_tree_branch(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||||
{
|
{
|
||||||
struct dom_renderer *renderer = stack->data;
|
struct dom_renderer *renderer = stack->renderer;
|
||||||
struct document *document = renderer->document;
|
struct document *document = renderer->document;
|
||||||
struct screen_char *template = &renderer->styles[node->type];
|
struct screen_char *template = &renderer->styles[node->type];
|
||||||
unsigned char *name, *id;
|
unsigned char *name, *id;
|
||||||
@ -536,7 +537,7 @@ render_dom_node_text(struct dom_renderer *renderer, struct screen_char *template
|
|||||||
static struct dom_node *
|
static struct dom_node *
|
||||||
render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *data)
|
render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||||
{
|
{
|
||||||
struct dom_renderer *renderer = stack->data;
|
struct dom_renderer *renderer = stack->renderer;
|
||||||
|
|
||||||
assert(node && renderer && renderer->document);
|
assert(node && renderer && renderer->document);
|
||||||
|
|
||||||
@ -550,7 +551,7 @@ render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *dat
|
|||||||
static struct dom_node *
|
static struct dom_node *
|
||||||
render_dom_proc_instr_source(struct dom_stack *stack, struct dom_node *node, void *data)
|
render_dom_proc_instr_source(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||||
{
|
{
|
||||||
struct dom_renderer *renderer = stack->data;
|
struct dom_renderer *renderer = stack->renderer;
|
||||||
unsigned char *value;
|
unsigned char *value;
|
||||||
int valuelen;
|
int valuelen;
|
||||||
|
|
||||||
@ -577,7 +578,7 @@ render_dom_proc_instr_source(struct dom_stack *stack, struct dom_node *node, voi
|
|||||||
static struct dom_node *
|
static struct dom_node *
|
||||||
render_dom_element_source(struct dom_stack *stack, struct dom_node *node, void *data)
|
render_dom_element_source(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||||
{
|
{
|
||||||
struct dom_renderer *renderer = stack->data;
|
struct dom_renderer *renderer = stack->renderer;
|
||||||
|
|
||||||
assert(node && renderer && renderer->document);
|
assert(node && renderer && renderer->document);
|
||||||
|
|
||||||
@ -587,27 +588,61 @@ render_dom_element_source(struct dom_stack *stack, struct dom_node *node, void *
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct dom_node *
|
static struct dom_node *
|
||||||
render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void *data)
|
render_dom_element_end_source(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||||
{
|
{
|
||||||
struct dom_stack_state *state = get_dom_stack_parent(stack);
|
struct dom_renderer *renderer = stack->renderer;
|
||||||
struct dom_renderer *renderer = stack->data;
|
struct sgml_parser_state *pstate = data;
|
||||||
struct screen_char *template = &renderer->styles[node->type];
|
struct scanner_token *token = &pstate->end_token;
|
||||||
struct dom_node *attribute = NULL;
|
unsigned char *string = token->string;
|
||||||
int i;
|
int length = token->length;
|
||||||
|
|
||||||
assert(node && renderer->document);
|
assert(node && renderer && renderer->document);
|
||||||
assert(state && state->list);
|
|
||||||
|
|
||||||
/* The attributes are sorted but we want them in the original order */
|
if (!string || !length)
|
||||||
foreach_dom_node(i, node, state->list) {
|
return node;
|
||||||
if (node->string >= renderer->position
|
|
||||||
&& (!attribute || node->string < attribute->string))
|
if (check_dom_node_source(renderer, string, length)) {
|
||||||
attribute = node;
|
render_dom_flush(renderer, string);
|
||||||
|
renderer->position = string + length;
|
||||||
|
assert_source(renderer, renderer->position, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(attribute);
|
render_dom_text(renderer, &renderer->styles[node->type], string, length);
|
||||||
node = attribute;
|
|
||||||
|
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct dom_node *
|
||||||
|
render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||||
|
{
|
||||||
|
struct dom_renderer *renderer = stack->renderer;
|
||||||
|
struct screen_char *template = &renderer->styles[node->type];
|
||||||
|
|
||||||
|
assert(node && renderer->document);
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
/* Disabled since the DOM source highlighter uses the stream parser and
|
||||||
|
* therefore the attributes is pushed to it in order. However, if/when
|
||||||
|
* we will support rendering (read saving) of loaded DOM trees this one
|
||||||
|
* small hack is needed to get the attributes in the original order. */
|
||||||
|
{
|
||||||
|
struct dom_stack_state *state = get_dom_stack_parent(stack);
|
||||||
|
struct dom_node *attribute = NULL;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
assert(state && state->list);
|
||||||
|
|
||||||
|
/* The attributes are sorted but we want them in the original order */
|
||||||
|
foreach_dom_node(i, node, state->list) {
|
||||||
|
if (node->string >= renderer->position
|
||||||
|
&& (!attribute || node->string < attribute->string))
|
||||||
|
attribute = node;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(attribute);
|
||||||
|
node = attribute;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
render_dom_node_text(renderer, template, node);
|
render_dom_node_text(renderer, template, node);
|
||||||
|
|
||||||
if (node->data.attribute.value) {
|
if (node->data.attribute.value) {
|
||||||
@ -668,7 +703,7 @@ render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void
|
|||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
static dom_stack_callback_T dom_source_renderer_callbacks[DOM_NODES] = {
|
static dom_stack_callback_T dom_source_renderer_push_callbacks[DOM_NODES] = {
|
||||||
/* */ NULL,
|
/* */ NULL,
|
||||||
/* DOM_NODE_ELEMENT */ render_dom_element_source,
|
/* DOM_NODE_ELEMENT */ render_dom_element_source,
|
||||||
/* DOM_NODE_ATTRIBUTE */ render_dom_attribute_source,
|
/* DOM_NODE_ATTRIBUTE */ render_dom_attribute_source,
|
||||||
@ -684,6 +719,22 @@ static dom_stack_callback_T dom_source_renderer_callbacks[DOM_NODES] = {
|
|||||||
/* DOM_NODE_NOTATION */ render_dom_node_source,
|
/* DOM_NODE_NOTATION */ render_dom_node_source,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static dom_stack_callback_T dom_source_renderer_pop_callbacks[DOM_NODES] = {
|
||||||
|
/* */ NULL,
|
||||||
|
/* DOM_NODE_ELEMENT */ render_dom_element_end_source,
|
||||||
|
/* DOM_NODE_ATTRIBUTE */ NULL,
|
||||||
|
/* DOM_NODE_TEXT */ NULL,
|
||||||
|
/* DOM_NODE_CDATA_SECTION */ NULL,
|
||||||
|
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
|
||||||
|
/* DOM_NODE_ENTITY */ NULL,
|
||||||
|
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
|
||||||
|
/* DOM_NODE_COMMENT */ NULL,
|
||||||
|
/* DOM_NODE_DOCUMENT */ NULL,
|
||||||
|
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
|
||||||
|
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
|
||||||
|
/* DOM_NODE_NOTATION */ NULL,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/* Shared multiplexor between renderers */
|
/* Shared multiplexor between renderers */
|
||||||
void
|
void
|
||||||
@ -694,19 +745,10 @@ render_dom_document(struct cache_entry *cached, struct document *document,
|
|||||||
struct dom_node *root;
|
struct dom_node *root;
|
||||||
struct dom_renderer renderer;
|
struct dom_renderer renderer;
|
||||||
struct conv_table *convert_table;
|
struct conv_table *convert_table;
|
||||||
dom_stack_callback_T *callbacks = dom_source_renderer_callbacks;
|
|
||||||
struct sgml_parser *parser;
|
struct sgml_parser *parser;
|
||||||
struct dom_stack stack;
|
|
||||||
|
|
||||||
assert(document->options.plain);
|
assert(document->options.plain);
|
||||||
|
|
||||||
parser = init_sgml_parser(cached, document);
|
|
||||||
if (!parser) return;
|
|
||||||
|
|
||||||
root = parse_sgml(parser, buffer);
|
|
||||||
done_sgml_parser(parser);
|
|
||||||
if (!root) return;
|
|
||||||
|
|
||||||
convert_table = get_convert_table(head, document->options.cp,
|
convert_table = get_convert_table(head, document->options.cp,
|
||||||
document->options.assume_cp,
|
document->options.assume_cp,
|
||||||
&document->cp,
|
&document->cp,
|
||||||
@ -714,11 +756,19 @@ render_dom_document(struct cache_entry *cached, struct document *document,
|
|||||||
document->options.hard_assume);
|
document->options.hard_assume);
|
||||||
|
|
||||||
init_dom_renderer(&renderer, document, buffer, convert_table);
|
init_dom_renderer(&renderer, document, buffer, convert_table);
|
||||||
init_dom_stack(&stack, &renderer, callbacks, 0);
|
|
||||||
|
|
||||||
document->bgcolor = document->options.default_bg;
|
document->bgcolor = document->options.default_bg;
|
||||||
|
|
||||||
walk_dom_nodes(&stack, root);
|
parser = init_sgml_parser(SGML_PARSER_STREAM, &renderer, cached,
|
||||||
|
document,
|
||||||
|
dom_source_renderer_push_callbacks,
|
||||||
|
dom_source_renderer_pop_callbacks);
|
||||||
|
if (!parser) return;
|
||||||
|
|
||||||
|
root = parse_sgml(parser, buffer);
|
||||||
|
done_sgml_parser(parser);
|
||||||
|
if (!root) return;
|
||||||
|
|
||||||
/* If there are no non-element nodes after the last element node make
|
/* If there are no non-element nodes after the last element node make
|
||||||
* sure that we flush to the end of the cache entry source including
|
* sure that we flush to the end of the cache entry source including
|
||||||
* the '>' of the last element tag if it has one. (bug 519) */
|
* the '>' of the last element tag if it has one. (bug 519) */
|
||||||
@ -727,5 +777,4 @@ render_dom_document(struct cache_entry *cached, struct document *document,
|
|||||||
}
|
}
|
||||||
|
|
||||||
done_dom_node(root);
|
done_dom_node(root);
|
||||||
done_dom_stack(&stack);
|
|
||||||
}
|
}
|
||||||
|
@ -45,19 +45,23 @@ realloc_dom_stack_state_objects(struct dom_stack *stack)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
init_dom_stack(struct dom_stack *stack, void *data,
|
init_dom_stack(struct dom_stack *stack, void *parser, void *renderer,
|
||||||
dom_stack_callback_T callbacks[DOM_NODES],
|
dom_stack_callback_T push_callbacks[DOM_NODES],
|
||||||
|
dom_stack_callback_T pop_callbacks[DOM_NODES],
|
||||||
size_t object_size)
|
size_t object_size)
|
||||||
{
|
{
|
||||||
assert(stack);
|
assert(stack);
|
||||||
|
|
||||||
memset(stack, 0, sizeof(*stack));
|
memset(stack, 0, sizeof(*stack));
|
||||||
|
|
||||||
stack->data = data;
|
stack->parser = parser;
|
||||||
|
stack->renderer = renderer;
|
||||||
stack->object_size = object_size;
|
stack->object_size = object_size;
|
||||||
|
|
||||||
if (callbacks)
|
if (push_callbacks)
|
||||||
memcpy(stack->callbacks, callbacks, DOM_STACK_CALLBACKS_SIZE);
|
memcpy(stack->push_callbacks, push_callbacks, DOM_STACK_CALLBACKS_SIZE);
|
||||||
|
if (pop_callbacks)
|
||||||
|
memcpy(stack->pop_callbacks, pop_callbacks, DOM_STACK_CALLBACKS_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -94,7 +98,6 @@ push_dom_node(struct dom_stack *stack, struct dom_node *node)
|
|||||||
|
|
||||||
if (stack->object_size) {
|
if (stack->object_size) {
|
||||||
unsigned char *state_objects;
|
unsigned char *state_objects;
|
||||||
size_t offset = stack->depth * stack->object_size;
|
|
||||||
|
|
||||||
state_objects = realloc_dom_stack_state_objects(stack);
|
state_objects = realloc_dom_stack_state_objects(stack);
|
||||||
if (!state_objects) {
|
if (!state_objects) {
|
||||||
@ -102,7 +105,7 @@ push_dom_node(struct dom_stack *stack, struct dom_node *node)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
state->data = (void *) &state_objects[offset];
|
state->depth = stack->depth;
|
||||||
}
|
}
|
||||||
|
|
||||||
state->node = node;
|
state->node = node;
|
||||||
@ -111,9 +114,11 @@ push_dom_node(struct dom_stack *stack, struct dom_node *node)
|
|||||||
* in the callbacks */
|
* in the callbacks */
|
||||||
stack->depth++;
|
stack->depth++;
|
||||||
|
|
||||||
callback = stack->callbacks[node->type];
|
callback = stack->push_callbacks[node->type];
|
||||||
if (callback) {
|
if (callback) {
|
||||||
node = callback(stack, node, state->data);
|
void *state_data = get_dom_stack_state_data(stack, state);
|
||||||
|
|
||||||
|
node = callback(stack, node, state_data);
|
||||||
|
|
||||||
/* If the callback returned NULL pop the state immediately */
|
/* If the callback returned NULL pop the state immediately */
|
||||||
if (!node) {
|
if (!node) {
|
||||||
@ -130,26 +135,26 @@ static int
|
|||||||
do_pop_dom_node(struct dom_stack *stack, struct dom_stack_state *parent)
|
do_pop_dom_node(struct dom_stack *stack, struct dom_stack_state *parent)
|
||||||
{
|
{
|
||||||
struct dom_stack_state *state;
|
struct dom_stack_state *state;
|
||||||
|
dom_stack_callback_T callback;
|
||||||
|
|
||||||
assert(stack);
|
assert(stack);
|
||||||
if (!dom_stack_has_parents(stack)) return 0;
|
if (!dom_stack_has_parents(stack)) return 0;
|
||||||
|
|
||||||
state = get_dom_stack_top(stack);
|
state = get_dom_stack_top(stack);
|
||||||
if (state->callback) {
|
callback = stack->pop_callbacks[state->node->type];
|
||||||
/* Pass the node we are popping to and _not_ the state->node */
|
if (callback) {
|
||||||
state->callback(stack, parent->node, state->data);
|
void *state_data = get_dom_stack_state_data(stack, state);
|
||||||
|
|
||||||
|
callback(stack, state->node, state_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
stack->depth--;
|
stack->depth--;
|
||||||
assert(stack->depth >= 0);
|
assert(stack->depth >= 0);
|
||||||
|
|
||||||
if (stack->object_size && state->data) {
|
if (stack->object_size) {
|
||||||
size_t offset = stack->depth * stack->object_size;
|
void *state_data = get_dom_stack_state_data(stack, state);
|
||||||
|
|
||||||
/* I tried to use item->data here but it caused a memory
|
memset(state_data, 0, stack->object_size);
|
||||||
* corruption bug on fm. This is also less trustworthy in that
|
|
||||||
* the state->data pointer could have been mangled. --jonas */
|
|
||||||
memset(&stack->state_objects[offset], 0, stack->object_size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(state, 0, sizeof(*state));
|
memset(state, 0, sizeof(*state));
|
||||||
@ -170,16 +175,28 @@ void
|
|||||||
pop_dom_nodes(struct dom_stack *stack, enum dom_node_type type,
|
pop_dom_nodes(struct dom_stack *stack, enum dom_node_type type,
|
||||||
unsigned char *string, uint16_t length)
|
unsigned char *string, uint16_t length)
|
||||||
{
|
{
|
||||||
struct dom_stack_state *state, *parent;
|
struct dom_stack_state *state;
|
||||||
unsigned int pos;
|
|
||||||
|
|
||||||
if (!dom_stack_has_parents(stack)) return;
|
if (!dom_stack_has_parents(stack)) return;
|
||||||
|
|
||||||
parent = search_dom_stack(stack, type, string, length);
|
state = search_dom_stack(stack, type, string, length);
|
||||||
if (!parent) return;
|
if (state)
|
||||||
|
pop_dom_state(stack, type, state);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pop_dom_state(struct dom_stack *stack, enum dom_node_type type,
|
||||||
|
struct dom_stack_state *target)
|
||||||
|
{
|
||||||
|
struct dom_stack_state *state;
|
||||||
|
unsigned int pos;
|
||||||
|
|
||||||
|
if (!target) return;
|
||||||
|
|
||||||
|
if (!dom_stack_has_parents(stack)) return;
|
||||||
|
|
||||||
foreachback_dom_state (stack, state, pos) {
|
foreachback_dom_state (stack, state, pos) {
|
||||||
if (do_pop_dom_node(stack, parent))
|
if (do_pop_dom_node(stack, target))
|
||||||
break;;
|
break;;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -24,14 +24,9 @@ struct dom_stack_state {
|
|||||||
/* The index (in the list above) which are currently being handled. */
|
/* The index (in the list above) which are currently being handled. */
|
||||||
size_t index;
|
size_t index;
|
||||||
|
|
||||||
/* A callback registered to be called when the node is popped. Used for
|
/* The depth of the state in the stack. This is amongst other things
|
||||||
* correctly highlighting ending elements (e.g. </a>). */
|
* used to get the state object data. */
|
||||||
dom_stack_callback_T callback;
|
unsigned int depth;
|
||||||
|
|
||||||
/* Parser specific data. For the SGML parser this holds DTD-oriented
|
|
||||||
* info about the node (recorded in struct sgml_node_info). E.g.
|
|
||||||
* whether an element node is optional. */
|
|
||||||
void *data;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* The DOM stack is a convenient way to traverse DOM trees. Also it
|
/* The DOM stack is a convenient way to traverse DOM trees. Also it
|
||||||
@ -42,14 +37,20 @@ struct dom_stack {
|
|||||||
struct dom_stack_state *states;
|
struct dom_stack_state *states;
|
||||||
size_t depth;
|
size_t depth;
|
||||||
|
|
||||||
/* This is one big array of parser specific objects which will be
|
/* This is one big array of parser specific objects. */
|
||||||
* assigned to the data member of the individual dom_stack_states. */
|
/* The objects hold parser specific data. For the SGML parser this
|
||||||
|
* holds DTD-oriented info about the node (recorded in struct
|
||||||
|
* sgml_node_info). E.g. whether an element node is optional. */
|
||||||
unsigned char *state_objects;
|
unsigned char *state_objects;
|
||||||
size_t object_size;
|
size_t object_size;
|
||||||
|
|
||||||
/* Parser and document specific stuff */
|
/* Renderer specific callbacks for the streaming parser mode. */
|
||||||
dom_stack_callback_T callbacks[DOM_NODES];
|
dom_stack_callback_T push_callbacks[DOM_NODES];
|
||||||
void *data;
|
dom_stack_callback_T pop_callbacks[DOM_NODES];
|
||||||
|
|
||||||
|
/* Data specific to the parser and renderer. */
|
||||||
|
void *renderer;
|
||||||
|
void *parser;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define dom_stack_has_parents(nav) \
|
#define dom_stack_has_parents(nav) \
|
||||||
@ -66,6 +67,9 @@ get_dom_stack_state(struct dom_stack *stack, int top_offset)
|
|||||||
#define get_dom_stack_parent(nav) get_dom_stack_state(nav, 1)
|
#define get_dom_stack_parent(nav) get_dom_stack_state(nav, 1)
|
||||||
#define get_dom_stack_top(nav) get_dom_stack_state(nav, 0)
|
#define get_dom_stack_top(nav) get_dom_stack_state(nav, 0)
|
||||||
|
|
||||||
|
#define get_dom_stack_state_data(stack, state) \
|
||||||
|
((void *) &(stack)->state_objects[(state)->depth * (stack)->object_size])
|
||||||
|
|
||||||
/* The state iterators do not include the bottom state */
|
/* The state iterators do not include the bottom state */
|
||||||
|
|
||||||
#define foreach_dom_state(nav, item, pos) \
|
#define foreach_dom_state(nav, item, pos) \
|
||||||
@ -84,6 +88,7 @@ search_dom_stack(struct dom_stack *stack, enum dom_node_type type,
|
|||||||
struct dom_stack_state *state;
|
struct dom_stack_state *state;
|
||||||
int pos;
|
int pos;
|
||||||
|
|
||||||
|
/* FIXME: Take node subtype and compare if non-zero or something. */
|
||||||
foreachback_dom_state (stack, state, pos) {
|
foreachback_dom_state (stack, state, pos) {
|
||||||
struct dom_node *parent = state->node;
|
struct dom_node *parent = state->node;
|
||||||
|
|
||||||
@ -102,8 +107,9 @@ search_dom_stack(struct dom_stack *stack, enum dom_node_type type,
|
|||||||
/* The @object_size arg tells whether the stack should allocate objects for each
|
/* The @object_size arg tells whether the stack should allocate objects for each
|
||||||
* state to be assigned to the state's @data member. Zero means no state data should
|
* state to be assigned to the state's @data member. Zero means no state data should
|
||||||
* be allocated. */
|
* be allocated. */
|
||||||
void init_dom_stack(struct dom_stack *stack, void *data,
|
void init_dom_stack(struct dom_stack *stack, void *parser, void *renderer,
|
||||||
dom_stack_callback_T callbacks[DOM_NODES],
|
dom_stack_callback_T push_callbacks[DOM_NODES],
|
||||||
|
dom_stack_callback_T pop_callbacks[DOM_NODES],
|
||||||
size_t object_size);
|
size_t object_size);
|
||||||
void done_dom_stack(struct dom_stack *stack);
|
void done_dom_stack(struct dom_stack *stack);
|
||||||
|
|
||||||
@ -118,6 +124,11 @@ void pop_dom_node(struct dom_stack *stack);
|
|||||||
void pop_dom_nodes(struct dom_stack *stack, enum dom_node_type type,
|
void pop_dom_nodes(struct dom_stack *stack, enum dom_node_type type,
|
||||||
unsigned char *string, uint16_t length);
|
unsigned char *string, uint16_t length);
|
||||||
|
|
||||||
|
/* Pop all stack states until a specific state is reached. */
|
||||||
|
void
|
||||||
|
pop_dom_state(struct dom_stack *stack, enum dom_node_type type,
|
||||||
|
struct dom_stack_state *target);
|
||||||
|
|
||||||
/* Visit each node in the tree rooted at @root pre-order */
|
/* Visit each node in the tree rooted at @root pre-order */
|
||||||
void walk_dom_nodes(struct dom_stack *stack, struct dom_node *root);
|
void walk_dom_nodes(struct dom_stack *stack, struct dom_node *root);
|
||||||
|
|
||||||
|
@ -37,66 +37,7 @@ static struct sgml_node_info html_elements[HTML_ELEMENTS] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static struct dom_node *
|
|
||||||
add_html_element_end_node(struct dom_stack *stack, struct dom_node *node, void *data)
|
|
||||||
{
|
|
||||||
struct sgml_parser *parser = stack->data;
|
|
||||||
struct dom_node *parent;
|
|
||||||
struct scanner_token *token;
|
|
||||||
|
|
||||||
assert(stack && parser && node);
|
|
||||||
assert(dom_stack_has_parents(stack));
|
|
||||||
|
|
||||||
/* Are we the actual node being popped? */
|
|
||||||
if (node != get_dom_stack_top(stack)->node)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
parent = get_dom_stack_parent(stack)->node;
|
|
||||||
token = get_scanner_token(&parser->scanner);
|
|
||||||
|
|
||||||
assertm(token, "No token found in callback");
|
|
||||||
assertm(token->type == SGML_TOKEN_ELEMENT_END, "Bad token found in callback");
|
|
||||||
|
|
||||||
if (!token->length) return NULL;
|
|
||||||
|
|
||||||
return add_dom_element(parent, token->string, token->length);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* TODO: We need to handle ascending of <br> and "<p>text1<p>text2" using data
|
|
||||||
* from sgml_node_info. */
|
|
||||||
static struct dom_node *
|
|
||||||
add_html_element_node(struct dom_stack *stack, struct dom_node *node, void *data)
|
|
||||||
{
|
|
||||||
struct sgml_parser *parser = stack->data;
|
|
||||||
|
|
||||||
assert(stack && node);
|
|
||||||
assert(dom_stack_has_parents(stack));
|
|
||||||
|
|
||||||
/* TODO: Move to SGML parser main loop and disguise these element ends
|
|
||||||
* in some internal processing instruction. */
|
|
||||||
if (parser->flags & SGML_PARSER_ADD_ELEMENT_ENDS)
|
|
||||||
get_dom_stack_top(stack)->callback = add_html_element_end_node;
|
|
||||||
|
|
||||||
return node;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
struct sgml_info sgml_html_info = {
|
struct sgml_info sgml_html_info = {
|
||||||
html_attributes,
|
html_attributes,
|
||||||
html_elements,
|
html_elements,
|
||||||
{
|
|
||||||
/* */ NULL,
|
|
||||||
/* DOM_NODE_ELEMENT */ add_html_element_node,
|
|
||||||
/* DOM_NODE_ATTRIBUTE */ NULL,
|
|
||||||
/* DOM_NODE_TEXT */ NULL,
|
|
||||||
/* DOM_NODE_CDATA_SECTION */ NULL,
|
|
||||||
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
|
|
||||||
/* DOM_NODE_ENTITY */ NULL,
|
|
||||||
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
|
|
||||||
/* DOM_NODE_COMMENT */ NULL,
|
|
||||||
/* DOM_NODE_DOCUMENT */ NULL,
|
|
||||||
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
|
|
||||||
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
|
|
||||||
/* DOM_NODE_NOTATION */ NULL,
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
@ -41,23 +41,27 @@ add_sgml_document(struct dom_stack *stack, struct uri *uri)
|
|||||||
static inline struct dom_node *
|
static inline struct dom_node *
|
||||||
add_sgml_element(struct dom_stack *stack, struct scanner_token *token)
|
add_sgml_element(struct dom_stack *stack, struct scanner_token *token)
|
||||||
{
|
{
|
||||||
struct sgml_parser *parser = stack->data;
|
struct sgml_parser *parser = stack->parser;
|
||||||
struct dom_node *parent = get_dom_stack_top(stack)->node;
|
struct dom_node *parent = get_dom_stack_top(stack)->node;
|
||||||
struct dom_stack_state *state;
|
struct dom_stack_state *state;
|
||||||
struct sgml_parser_state *pstate;
|
struct sgml_parser_state *pstate;
|
||||||
struct dom_node *node;
|
struct dom_node *node;
|
||||||
|
struct sgml_node_info *node_info;
|
||||||
|
|
||||||
node = add_dom_element(parent, token->string, token->length);
|
node = add_dom_element(parent, token->string, token->length);
|
||||||
|
if (!node) return NULL;
|
||||||
|
|
||||||
if (!node || !push_dom_node(stack, node))
|
node_info = get_sgml_node_info(parser->info->elements, node);
|
||||||
|
node->data.element.type = node_info->type;
|
||||||
|
|
||||||
|
if (!push_dom_node(stack, node))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
state = get_dom_stack_top(stack);
|
state = get_dom_stack_top(stack);
|
||||||
assert(node == state->node && state->data);
|
assert(node == state->node);
|
||||||
|
|
||||||
pstate = state->data;
|
pstate = get_dom_stack_state_data(stack, state);
|
||||||
pstate->info = get_sgml_node_info(parser->info->elements, node);
|
pstate->info = node_info;
|
||||||
node->data.element.type = pstate->info->type;
|
|
||||||
|
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
@ -67,7 +71,7 @@ static inline void
|
|||||||
add_sgml_attribute(struct dom_stack *stack,
|
add_sgml_attribute(struct dom_stack *stack,
|
||||||
struct scanner_token *token, struct scanner_token *valtoken)
|
struct scanner_token *token, struct scanner_token *valtoken)
|
||||||
{
|
{
|
||||||
struct sgml_parser *parser = stack->data;
|
struct sgml_parser *parser = stack->parser;
|
||||||
struct dom_node *parent = get_dom_stack_top(stack)->node;
|
struct dom_node *parent = get_dom_stack_top(stack)->node;
|
||||||
unsigned char *value = valtoken ? valtoken->string : NULL;
|
unsigned char *value = valtoken ? valtoken->string : NULL;
|
||||||
uint16_t valuelen = valtoken ? valtoken->length : 0;
|
uint16_t valuelen = valtoken ? valtoken->length : 0;
|
||||||
@ -77,9 +81,6 @@ add_sgml_attribute(struct dom_stack *stack,
|
|||||||
node = add_dom_attribute(parent, token->string, token->length,
|
node = add_dom_attribute(parent, token->string, token->length,
|
||||||
value, valuelen);
|
value, valuelen);
|
||||||
|
|
||||||
if (!node || !push_dom_node(stack, node))
|
|
||||||
return;
|
|
||||||
|
|
||||||
info = get_sgml_node_info(parser->info->attributes, node);
|
info = get_sgml_node_info(parser->info->attributes, node);
|
||||||
|
|
||||||
node->data.attribute.type = info->type;
|
node->data.attribute.type = info->type;
|
||||||
@ -89,6 +90,9 @@ add_sgml_attribute(struct dom_stack *stack,
|
|||||||
if (valtoken && valtoken->type == SGML_TOKEN_STRING)
|
if (valtoken && valtoken->type == SGML_TOKEN_STRING)
|
||||||
node->data.attribute.quoted = 1;
|
node->data.attribute.quoted = 1;
|
||||||
|
|
||||||
|
if (!node || !push_dom_node(stack, node))
|
||||||
|
return;
|
||||||
|
|
||||||
pop_dom_node(stack);
|
pop_dom_node(stack);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -243,8 +247,18 @@ parse_sgml_document(struct dom_stack *stack, struct scanner *scanner)
|
|||||||
if (!token->length) {
|
if (!token->length) {
|
||||||
pop_dom_node(stack);
|
pop_dom_node(stack);
|
||||||
} else {
|
} else {
|
||||||
pop_dom_nodes(stack, DOM_NODE_ELEMENT,
|
struct dom_stack_state *state;
|
||||||
token->string, token->length);
|
|
||||||
|
state = search_dom_stack(stack, DOM_NODE_ELEMENT,
|
||||||
|
token->string, token->length);
|
||||||
|
if (state) {
|
||||||
|
struct sgml_parser_state *pstate;
|
||||||
|
|
||||||
|
pstate = get_dom_stack_state_data(stack, state);
|
||||||
|
copy_struct(&pstate->end_token, token);
|
||||||
|
|
||||||
|
pop_dom_state(stack, DOM_NODE_ELEMENT, state);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
skip_scanner_token(scanner);
|
skip_scanner_token(scanner);
|
||||||
break;
|
break;
|
||||||
@ -293,7 +307,10 @@ parse_sgml_document(struct dom_stack *stack, struct scanner *scanner)
|
|||||||
|
|
||||||
|
|
||||||
struct sgml_parser *
|
struct sgml_parser *
|
||||||
init_sgml_parser(struct cache_entry *cached, struct document *document)
|
init_sgml_parser(enum sgml_parser_type type, void *renderer,
|
||||||
|
struct cache_entry *cached, struct document *document,
|
||||||
|
dom_stack_callback_T push_callbacks[DOM_NODES],
|
||||||
|
dom_stack_callback_T pop_callbacks[DOM_NODES])
|
||||||
{
|
{
|
||||||
size_t obj_size = sizeof(struct sgml_parser_state);
|
size_t obj_size = sizeof(struct sgml_parser_state);
|
||||||
struct sgml_parser *parser;
|
struct sgml_parser *parser;
|
||||||
@ -301,14 +318,13 @@ init_sgml_parser(struct cache_entry *cached, struct document *document)
|
|||||||
parser = mem_calloc(1, sizeof(*parser));
|
parser = mem_calloc(1, sizeof(*parser));
|
||||||
if (!parser) return NULL;
|
if (!parser) return NULL;
|
||||||
|
|
||||||
|
parser->type = type;
|
||||||
parser->document = document;
|
parser->document = document;
|
||||||
parser->cache_entry = cached;
|
parser->cache_entry = cached;
|
||||||
parser->info = &sgml_html_info;
|
parser->info = &sgml_html_info;
|
||||||
|
|
||||||
init_dom_stack(&parser->stack, parser, parser->info->callbacks, obj_size);
|
init_dom_stack(&parser->stack, parser, renderer,
|
||||||
|
push_callbacks, pop_callbacks, obj_size);
|
||||||
if (document->options.plain)
|
|
||||||
parser->flags |= SGML_PARSER_ADD_ELEMENT_ENDS;
|
|
||||||
|
|
||||||
return parser;
|
return parser;
|
||||||
}
|
}
|
||||||
|
@ -11,13 +11,21 @@ struct cache_entry;
|
|||||||
struct document;
|
struct document;
|
||||||
struct string;
|
struct string;
|
||||||
|
|
||||||
enum sgml_parser_flags {
|
enum sgml_parser_type {
|
||||||
SGML_PARSER_ADD_ELEMENT_ENDS = 1,
|
/* The first one is a DOM tree builder. */
|
||||||
|
SGML_PARSER_TREE,
|
||||||
|
/* The second one will simply push nodes on the stack, not building a
|
||||||
|
* DOM tree. This interface is similar to that of SAX (Simple API for
|
||||||
|
* XML) where events are fired when nodes are entered and exited. It is
|
||||||
|
* useful when you are not actually interested in the DOM tree, but can
|
||||||
|
* do all processing in a stream-like manner, such as when highlighting
|
||||||
|
* HTML code. */
|
||||||
|
SGML_PARSER_STREAM,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct sgml_parser {
|
struct sgml_parser {
|
||||||
/* The parser flags controls what gets added to the DOM tree */
|
enum sgml_parser_type type;
|
||||||
enum sgml_parser_flags flags;
|
|
||||||
struct sgml_info *info;
|
struct sgml_info *info;
|
||||||
|
|
||||||
struct document *document;
|
struct document *document;
|
||||||
@ -30,10 +38,16 @@ struct sgml_parser {
|
|||||||
|
|
||||||
struct sgml_parser_state {
|
struct sgml_parser_state {
|
||||||
struct sgml_node_info *info;
|
struct sgml_node_info *info;
|
||||||
|
/* This is used by the DOM source renderer for highlighting the
|
||||||
|
* end-tag of an element. */
|
||||||
|
struct scanner_token end_token;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct sgml_parser *
|
struct sgml_parser *
|
||||||
init_sgml_parser(struct cache_entry *cached, struct document *document);
|
init_sgml_parser(enum sgml_parser_type type, void *renderer,
|
||||||
|
struct cache_entry *cached, struct document *document,
|
||||||
|
dom_stack_callback_T push_callbacks[DOM_NODES],
|
||||||
|
dom_stack_callback_T pop_callbacks[DOM_NODES]);
|
||||||
|
|
||||||
void done_sgml_parser(struct sgml_parser *parser);
|
void done_sgml_parser(struct sgml_parser *parser);
|
||||||
|
|
||||||
|
@ -76,7 +76,6 @@ get_sgml_node_info(struct sgml_node_info list[], struct dom_node *node)
|
|||||||
struct sgml_info {
|
struct sgml_info {
|
||||||
struct sgml_node_info *attributes;
|
struct sgml_node_info *attributes;
|
||||||
struct sgml_node_info *elements;
|
struct sgml_node_info *elements;
|
||||||
dom_stack_callback_T callbacks[DOM_NODES];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user