mirror of
https://github.com/rkd77/elinks.git
synced 2025-02-02 15:09:23 -05:00
Add support for nested calls to parse_sgml()
It introduces a secondary stack for the SGML parser which records a parsing in progress (stuff like the scanner and depth where the parsing began). This should make it possible to eventually call parse_sgml() with the output created from the ECMASCript's document.write() function or even output from processing instructions (for example <?php code ?> ;).
This commit is contained in:
parent
cb90dcd58c
commit
dda6064b47
@ -311,6 +311,34 @@ parse_sgml_document(struct dom_stack *stack, struct scanner *scanner)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||||
|
{
|
||||||
|
struct sgml_parser *parser = get_sgml_parser(stack);
|
||||||
|
struct sgml_parsing_state *parsing = data;
|
||||||
|
unsigned char *source = node->string.string;
|
||||||
|
unsigned char *end = source + node->string.length;
|
||||||
|
|
||||||
|
parsing->depth = parser->stack.depth;
|
||||||
|
get_dom_stack_top(&parser->stack)->immutable = 1;
|
||||||
|
init_scanner(&parsing->scanner, &sgml_scanner_info, source, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
sgml_parsing_pop(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||||
|
{
|
||||||
|
struct sgml_parser *parser = get_sgml_parser(stack);
|
||||||
|
struct sgml_parsing_state *parsing = data;
|
||||||
|
|
||||||
|
/* Pop the stack back to the state it was in. This includes cleaning
|
||||||
|
* away even immutable states left on the stack. */
|
||||||
|
while (parsing->depth < parser->stack.depth) {
|
||||||
|
get_dom_stack_top(&parser->stack)->immutable = 0;
|
||||||
|
pop_dom_node(&parser->stack);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(parsing->depth == parser->stack.depth);
|
||||||
|
}
|
||||||
|
|
||||||
static struct dom_stack_context_info sgml_parser_context_info = {
|
static struct dom_stack_context_info sgml_parser_context_info = {
|
||||||
/* Object size: */ sizeof(struct sgml_parser_state),
|
/* Object size: */ sizeof(struct sgml_parser_state),
|
||||||
@ -348,6 +376,42 @@ static struct dom_stack_context_info sgml_parser_context_info = {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static struct dom_stack_context_info sgml_parsing_context_info = {
|
||||||
|
/* Object size: */ sizeof(struct sgml_parsing_state),
|
||||||
|
/* Push: */
|
||||||
|
{
|
||||||
|
/* */ NULL,
|
||||||
|
/* DOM_NODE_ELEMENT */ NULL,
|
||||||
|
/* DOM_NODE_ATTRIBUTE */ NULL,
|
||||||
|
/* DOM_NODE_TEXT */ sgml_parsing_push,
|
||||||
|
/* DOM_NODE_CDATA_SECTION */ NULL,
|
||||||
|
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
|
||||||
|
/* DOM_NODE_ENTITY */ NULL,
|
||||||
|
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
|
||||||
|
/* DOM_NODE_COMMENT */ NULL,
|
||||||
|
/* DOM_NODE_DOCUMENT */ NULL,
|
||||||
|
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
|
||||||
|
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
|
||||||
|
/* DOM_NODE_NOTATION */ NULL,
|
||||||
|
},
|
||||||
|
/* Pop: */
|
||||||
|
{
|
||||||
|
/* */ NULL,
|
||||||
|
/* DOM_NODE_ELEMENT */ NULL,
|
||||||
|
/* DOM_NODE_ATTRIBUTE */ NULL,
|
||||||
|
/* DOM_NODE_TEXT */ sgml_parsing_pop,
|
||||||
|
/* DOM_NODE_CDATA_SECTION */ NULL,
|
||||||
|
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
|
||||||
|
/* DOM_NODE_ENTITY */ NULL,
|
||||||
|
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
|
||||||
|
/* DOM_NODE_COMMENT */ NULL,
|
||||||
|
/* DOM_NODE_DOCUMENT */ NULL,
|
||||||
|
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
|
||||||
|
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
|
||||||
|
/* DOM_NODE_NOTATION */ NULL,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct sgml_parser *
|
struct sgml_parser *
|
||||||
init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype,
|
init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype,
|
||||||
struct uri *uri)
|
struct uri *uri)
|
||||||
@ -370,6 +434,10 @@ init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype,
|
|||||||
* and feed document.write() data back to the parser. */
|
* and feed document.write() data back to the parser. */
|
||||||
add_dom_stack_context(&parser->stack, parser, &sgml_parser_context_info);
|
add_dom_stack_context(&parser->stack, parser, &sgml_parser_context_info);
|
||||||
|
|
||||||
|
/* Don't keep the 'fake' text nodes that holds the parsing data. */
|
||||||
|
init_dom_stack(&parser->parsing, 0);
|
||||||
|
add_dom_stack_context(&parser->parsing, parser, &sgml_parsing_context_info);
|
||||||
|
|
||||||
return parser;
|
return parser;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -377,43 +445,45 @@ void
|
|||||||
done_sgml_parser(struct sgml_parser *parser)
|
done_sgml_parser(struct sgml_parser *parser)
|
||||||
{
|
{
|
||||||
done_dom_stack(&parser->stack);
|
done_dom_stack(&parser->stack);
|
||||||
|
done_dom_stack(&parser->parsing);
|
||||||
done_uri(parser->uri);
|
done_uri(parser->uri);
|
||||||
mem_free(parser);
|
mem_free(parser);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* FIXME: Make it possible to push variable number of strings (even nested
|
static struct sgml_parsing_state *
|
||||||
* while parsing another string) so that we can feed back output of stuff
|
init_sgml_parsing_state(struct sgml_parser *parser, struct string *buffer)
|
||||||
* like ECMAScripts document.write(). */
|
{
|
||||||
|
struct dom_stack_state *state;
|
||||||
|
struct dom_node *node;
|
||||||
|
|
||||||
|
node = init_dom_node(DOM_NODE_TEXT, buffer->source, buffer->length);
|
||||||
|
if (!node || !push_dom_node(&parser->parsing, node))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
state = get_dom_stack_top(&parser->parsing);
|
||||||
|
|
||||||
|
return get_dom_stack_state_data(parser->parsing.contexts, state);
|
||||||
|
}
|
||||||
|
|
||||||
struct dom_node *
|
struct dom_node *
|
||||||
parse_sgml(struct sgml_parser *parser, struct string *buffer)
|
parse_sgml(struct sgml_parser *parser, struct string *buffer)
|
||||||
{
|
{
|
||||||
unsigned char *source = buffer->source;
|
struct sgml_parsing_state *parsing;
|
||||||
unsigned char *end = source + buffer->length;
|
|
||||||
size_t depth;
|
|
||||||
|
|
||||||
if (!parser->root) {
|
if (!parser->root) {
|
||||||
parser->root = add_sgml_document(&parser->stack, parser->uri);
|
parser->root = add_sgml_document(&parser->stack, parser->uri);
|
||||||
if (!parser->root)
|
if (!parser->root)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
get_dom_stack_top(&parser->stack)->immutable = 1;
|
get_dom_stack_top(&parser->stack)->immutable = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
init_scanner(&parser->scanner, &sgml_scanner_info, source, end);
|
parsing = init_sgml_parsing_state(parser, buffer);
|
||||||
|
if (!parsing) return NULL;
|
||||||
|
|
||||||
/* FIXME: Make parse_sgml_document() return an error code. */
|
/* FIXME: Make parse_sgml_document() return an error code. */
|
||||||
depth = parser->stack.depth;
|
parse_sgml_document(&parser->stack, &parsing->scanner);
|
||||||
parse_sgml_document(&parser->stack, &parser->scanner);
|
|
||||||
|
|
||||||
/* Pop the stack back to the state it was in. This includes cleaning
|
pop_dom_node(&parser->parsing);
|
||||||
* away even immutable states left on the stack. */
|
|
||||||
while (depth < parser->stack.depth) {
|
|
||||||
get_dom_stack_top(&parser->stack)->immutable = 0;
|
|
||||||
pop_dom_node(&parser->stack);
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(depth == parser->stack.depth);
|
|
||||||
|
|
||||||
/* FIXME: Return the 'bottom' node that was added by the parser. */
|
|
||||||
return parser->root;
|
return parser->root;
|
||||||
}
|
}
|
||||||
|
@ -22,16 +22,26 @@ enum sgml_parser_type {
|
|||||||
SGML_PARSER_STREAM,
|
SGML_PARSER_STREAM,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct sgml_parser {
|
/* This holds info about a chunk of text being parsed. The SGML parser uses
|
||||||
enum sgml_parser_type type;
|
* these to keep track of possible nested calls to parse_sgml(). This can be
|
||||||
|
* used to feed output of stuff like ECMAScripts document.write() from
|
||||||
struct sgml_info *info;
|
* <script>-elements back to the SGML parser. */
|
||||||
|
struct sgml_parsing_state {
|
||||||
struct uri *uri;
|
|
||||||
struct dom_node *root;
|
|
||||||
|
|
||||||
struct scanner scanner;
|
struct scanner scanner;
|
||||||
struct dom_stack stack;
|
struct dom_node *node;
|
||||||
|
size_t depth;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct sgml_parser {
|
||||||
|
enum sgml_parser_type type; /* Stream or tree */
|
||||||
|
|
||||||
|
struct sgml_info *info; /* Backend dependent info */
|
||||||
|
|
||||||
|
struct uri *uri; /* The URI of the DOM document */
|
||||||
|
struct dom_node *root; /* The document root node */
|
||||||
|
|
||||||
|
struct dom_stack stack; /* A stack for tracking parsed nodes */
|
||||||
|
struct dom_stack parsing; /* Used for tracking parsing states */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct sgml_parser_state {
|
struct sgml_parser_state {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user