1
0
mirror of https://github.com/rkd77/elinks.git synced 2025-01-03 14:57:44 -05:00

Merge with git+ssh://pasky.or.cz/srv/git/elinks.git

This commit is contained in:
2006-01-16 16:18:52 +01:00
commit 178c50cf40
16 changed files with 621 additions and 135 deletions

View File

@ -12,6 +12,9 @@ docdir = $(datadir)/doc
HTML_DIR = $(DESTDIR)$(docdir)/$(PACKAGE)/html
PDF_DIR = $(DESTDIR)$(docdir)/$(PACKAGE)/pdf
ASCIIDOC_CONF = $(srcdir)asciidoc.conf
ASCIIDOC_FLAGS = -f $(ASCIIDOC_CONF) -a "builddir=$(CURDIR)/"
#############################################################################
# Build files
@ -108,7 +111,7 @@ quiet_cmd_help2doc = ' [$(LINK_COLOR)HELP2DOC$(END_COLOR)] $(RELPATH)$@'
cmd_help2doc = $(LOCALES) $(HELP2DOC) $(ELINKS) $@ > $@
quiet_cmd_conf2doc = ' [$(LINK_COLOR)CONF2DOC$(END_COLOR)] $(RELPATH)$@'
cmd_conf2doc = $(LOCALES) $(CONF2DOC) > $@
cmd_conf2doc = $(LOCALES) $(CONF2DOC) $(FEATURES) > $@
quiet_cmd_keys2doc = ' [$(LINK_COLOR)KEYS2DOC$(END_COLOR)] $(RELPATH)$@'
cmd_keys2doc = $(LOCALES) $(KEYS2DOC) $(KBDBIND) $@ > $@
@ -149,25 +152,27 @@ api: $(patsubst %.txt,%.html,$(API_TXT))
cmd_jw = $(JW) -b $(2) $<
quiet_cmd_xmlto = ' [$(LINK_COLOR)XMLTO$(END_COLOR)] $(RELPATH)$@'
cmd_xmlto = $(XMLTO) $(3) $(2) $<
cmd_xmlto = $(XMLTO) -o $(call outdir) $(3) $(2) $<
quiet_cmd_pod2html = ' [$(LINK_COLOR)POD2HTML$(END_COLOR)] $(RELPATH)$@'
cmd_pod2html = $(POD2HTML) --outfile=$@ < $<
quiet_cmd_asciidoc = ' [$(LINK_COLOR)ASCIIDOC$(END_COLOR)] $(RELPATH)$@'
cmd_asciidoc = $(ASCIIDOC) -f asciidoc.conf -b $(2) -d $(call doctype,$<) -o $@ $<
cmd_asciidoc = $(ASCIIDOC) $(ASCIIDOC_FLAGS) -b $(call backend) -d $(call doctype) -o $@ $<
# Based on filename in $(1) find out asciidoc doctype.
doctype = $(if $(findstring .1.,$(1)),manpage,$(if $(findstring .5.,$(1)),manpage,book))
# Based on $@ find out asciidoc doctype or backend + xmlto output dir.
doctype = $(if $(findstring .1.,$@)$(findstring .5.,$@),manpage,book)
backend = $(if $(findstring .xml,$@),docbook,xhtml11)
outdir = $(if $(findstring -chunked,$@),$@,.)
# Loosely track dependencies via asciidoc includes.
asciidoc_dep = sed -n 's@include::\(.*\)\[.*@$@: \1@p' < $< > .deps/$(@F).asciidoc
asciidoc_dep = sed -n 's/[{]builddir}//g;s@include::\(.*\)\[.*@$@: $< \1@p' < $< > .deps/$(@F).asciidoc
-include .deps/*.asciidoc
# Do a little post-processing of man pages. Inserting title headers and date.
MAN_DATE = $(shell date -I)
man_desc = `sed -n 's/:Description:\s*\(.*\)/\1/p' < $(subst .xml,.txt,$<)`
man_desc = `sed -n 's/:Description:\s*\(.*\)/\1/p' < $(srcdir)$(subst .xml,.txt,$(<F))`
man_hack = sed "s/^\(\.TH \"ELINKS[^\"]*\" [0-9] \).*/\1\"$(1)\" \"$(MAN_DATE)\" \"$(1)\"/" < $@ | \
sed "s/@squote@/\\\\'/g" > $@.tmp && mv $@.tmp $@
@ -175,11 +180,11 @@ man_hack = sed "s/^\(\.TH \"ELINKS[^\"]*\" [0-9] \).*/\1\"$(1)\" \"$(MAN_DATE)\"
#############################################################################
# Build recipies
%.html: %.txt asciidoc.conf
%.html: %.txt $(ASCIIDOC_CONF)
$(call cmd,asciidoc,xhtml11)
@-$(call asciidoc_dep)
%.xml: %.txt asciidoc.conf
%.xml: %.txt $(ASCIIDOC_CONF)
$(call cmd,asciidoc,docbook)
@-$(call asciidoc_dep)
@ -192,7 +197,7 @@ man_hack = sed "s/^\(\.TH \"ELINKS[^\"]*\" [0-9] \).*/\1\"$(1)\" \"$(MAN_DATE)\"
@$(call man_hack,$(call man_desc))
%.html-chunked: %.xml
$(call cmd,xmlto,html,-o $@)
$(call cmd,xmlto,html)
%.pdf: %.xml
$(call cmd,jw,pdf)

View File

@ -35,7 +35,7 @@ Most options can be set in the user interface or config file, so usually you
do not need to care about them. Note that this list is roughly equivalent to
the output of running ELinks with the option `--long-help`.
include::option-command.txt[]
include::{builddir}option-command.txt[]
ENVIRONMENT VARIABLES
---------------------

View File

@ -56,7 +56,7 @@ Some sample settings:
OPTIONS
-------
include::option-config.txt[]
include::{builddir}option-config.txt[]
SEE ALSO
--------

View File

@ -93,7 +93,7 @@ FIXME:
KEYMAP ACTIONS
--------------
include::keymap-actions.txt[]
include::{builddir}keymap-actions.txt[]
DEFAULT BINDINGS
----------------
@ -101,7 +101,7 @@ DEFAULT BINDINGS
The default bindings are shown below. Any bindings in `~/.elinks/elinks.conf`
will override these.
include::keymap-defaults.txt[]
include::{builddir}keymap-defaults.txt[]
AUTHOR
------

View File

@ -71,6 +71,6 @@ include::small.txt[]
include::ecmascript.txt[]
include::features.txt[]
include::{builddir}features.txt[]
endif::installation-webpage[]

View File

@ -4,6 +4,7 @@
# Copyright (c) Jonas Fonseca <fonseca@diku.dk>, 2005
#
CONFFILE="$1"
TMPFILE=$(mktemp import-features.conf.XXXXXX) || exit 1
strip_comment()
@ -53,7 +54,7 @@ __END__
fi
}
cat ../features.conf | while read line; do
cat "$CONFFILE" | while read line; do
case "$line" in
"### "*)
print_section

View File

@ -22,6 +22,7 @@
#include "document/document.h"
#include "document/dom/renderer.h"
#include "document/renderer.h"
#include "dom/configuration.h"
#include "dom/scanner.h"
#include "dom/sgml/parser.h"
#include "dom/sgml/rss/rss.h"
@ -500,7 +501,7 @@ render_dom_node_enhanced_text(struct dom_renderer *renderer, struct dom_node *no
}
#endif
static void
enum dom_stack_code
render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->current->data;
@ -513,15 +514,15 @@ render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *dat
|| node->type == DOM_NODE_CDATA_SECTION
|| node->type == DOM_NODE_COMMENT)) {
render_dom_node_enhanced_text(renderer, node);
return;
}
} else
#endif
render_dom_node_text(renderer, &renderer->styles[node->type], node);
render_dom_node_text(renderer, &renderer->styles[node->type], node);
return DOM_STACK_CODE_OK;
}
/* This callback is also used for rendering processing instruction nodes. */
static void
static enum dom_stack_code
render_dom_element_source(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->current->data;
@ -529,9 +530,11 @@ render_dom_element_source(struct dom_stack *stack, struct dom_node *node, void *
assert(node && renderer && renderer->document);
render_dom_node_text(renderer, &renderer->styles[node->type], node);
return DOM_STACK_CODE_OK;
}
static void
enum dom_stack_code
render_dom_element_end_source(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->current->data;
@ -544,7 +547,7 @@ render_dom_element_end_source(struct dom_stack *stack, struct dom_node *node, vo
assert(node && renderer && renderer->document);
if (!string || !length)
return;
return DOM_STACK_CODE_OK;
if (check_dom_node_source(renderer, string, length)) {
render_dom_flush(renderer, string);
@ -553,9 +556,11 @@ render_dom_element_end_source(struct dom_stack *stack, struct dom_node *node, vo
}
render_dom_text(renderer, &renderer->styles[node->type], string, length);
return DOM_STACK_CODE_OK;
}
static void
enum dom_stack_code
render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->current->data;
@ -620,9 +625,11 @@ render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void
render_dom_text(renderer, template, value, valuelen);
}
}
return DOM_STACK_CODE_OK;
}
static void
enum dom_stack_code
render_dom_cdata_source(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->current->data;
@ -639,9 +646,11 @@ render_dom_cdata_source(struct dom_stack *stack, struct dom_node *node, void *da
}
render_dom_node_text(renderer, &renderer->styles[node->type], node);
return DOM_STACK_CODE_OK;
}
static void
enum dom_stack_code
render_dom_document_end(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->current->data;
@ -652,6 +661,8 @@ render_dom_document_end(struct dom_stack *stack, struct dom_node *node, void *da
if (check_dom_node_source(renderer, renderer->position, 0)) {
render_dom_flush(renderer, renderer->end);
}
return DOM_STACK_CODE_OK;
}
static struct dom_stack_context_info dom_source_renderer_context_info = {
@ -693,7 +704,10 @@ static struct dom_stack_context_info dom_source_renderer_context_info = {
/* DOM RSS Renderer */
static void
#define RSS_CONFIG_FLAGS \
(DOM_CONFIG_NORMALIZE_WHITESPACE | DOM_CONFIG_NORMALIZE_CHARACTERS)
enum dom_stack_code
dom_rss_push_element(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->current->data;
@ -735,9 +749,11 @@ dom_rss_push_element(struct dom_stack *stack, struct dom_node *node, void *data)
renderer->node = node;
}
return DOM_STACK_CODE_OK;
}
static void
enum dom_stack_code
dom_rss_pop_element(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->current->data;
@ -776,74 +792,21 @@ dom_rss_pop_element(struct dom_stack *stack, struct dom_node *node, void *data)
default:
break;
}
}
static void
dom_rss_push_content(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->current->data;
unsigned char *string = node->string.string;
int length = node->string.length;
assert(node && renderer && renderer->document);
if (!renderer->node)
return;
if (node->type == DOM_NODE_ENTITY_REFERENCE) {
string -= 1;
length += 2;
}
if (!is_dom_string_set(&renderer->text)) {
init_dom_string(&renderer->text, string, length);
} else {
add_to_dom_string(&renderer->text, string, length);
}
}
static struct dom_string *
get_rss_node_text(struct dom_node *node)
{
struct dom_node *child;
int index;
if (!node->data.element.children)
return NULL;
foreach_dom_node (node->data.element.children, child, index) {
if (child->type == DOM_NODE_TEXT)
return &child->string;
}
return NULL;
}
static struct dom_node *
get_rss_child(struct dom_node *parent, enum rss_element_type type)
{
struct dom_node *node;
int index;
if (!parent->data.element.children)
return NULL;
foreach_dom_node (parent->data.element.children, node, index) {
if (node->type == DOM_NODE_ELEMENT
&& type == node->data.element.type)
return node;
}
return NULL;
return DOM_STACK_CODE_OK;
}
static struct dom_string *
get_rss_text(struct dom_node *node, enum rss_element_type type)
{
node = get_rss_child(node, type);
node = get_dom_node_child(node, DOM_NODE_ELEMENT, type);
return node ? get_rss_node_text(node) : NULL;
if (!node) return NULL;
node = get_dom_node_child(node, DOM_NODE_TEXT, 0);
return node ? &node->string: NULL;
}
static void
@ -901,13 +864,13 @@ render_rss_item(struct dom_renderer *renderer, struct dom_node *item)
}
}
static void
enum dom_stack_code
dom_rss_pop_document(struct dom_stack *stack, struct dom_node *root, void *data)
{
struct dom_renderer *renderer = stack->current->data;
if (!renderer->channel)
return;
return DOM_STACK_CODE_OK;
render_rss_item(renderer, renderer->channel);
@ -927,6 +890,8 @@ dom_rss_pop_document(struct dom_stack *stack, struct dom_node *root, void *data)
mem_free_if(renderer->items);
done_dom_node(root);
return DOM_STACK_CODE_OK;
}
@ -937,9 +902,9 @@ static struct dom_stack_context_info dom_rss_renderer_context_info = {
/* */ NULL,
/* DOM_NODE_ELEMENT */ dom_rss_push_element,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ dom_rss_push_content,
/* DOM_NODE_CDATA_SECTION */ dom_rss_push_content,
/* DOM_NODE_ENTITY_REFERENCE */ dom_rss_push_content,
/* DOM_NODE_TEXT */ NULL,
/* DOM_NODE_CDATA_SECTION */ NULL,
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
@ -1028,6 +993,7 @@ render_dom_document(struct cache_entry *cached, struct document *document,
} else if (doctype == SGML_DOCTYPE_RSS) {
add_dom_stack_context(&parser->stack, &renderer,
&dom_rss_renderer_context_info);
add_dom_config_normalizer(&parser->stack, RSS_CONFIG_FLAGS);
}
/* FIXME: When rendering this way we don't really care about the code.

View File

@ -2,7 +2,7 @@ top_builddir=../..
include $(top_builddir)/Makefile.config
SUBDIRS = css sgml
OBJS = node.o select.o stack.o scanner.o
OBJS = configuration.o node.o select.o stack.o scanner.o
SUBDIRS-$(CONFIG_DEBUG) += test

300
src/dom/configuration.c Normal file
View File

@ -0,0 +1,300 @@
/* DOM Configuration */
#include "elinks.h"
#include "dom/configuration.h"
#include "dom/node.h"
#include "dom/stack.h"
#include "dom/string.h"
static enum dom_stack_code
normalize_text_node_whitespace(struct dom_node *node)
{
unsigned char buf[256];
struct dom_string string = INIT_DOM_STRING(NULL, 0);
int count = 0, i = 0;
unsigned char *text = node->string.string;
assert(node->type == DOM_NODE_TEXT);
while (i < node->string.length) {
int j;
for (j = 0; j < sizeof(buf) && i < node->string.length; i++) {
unsigned char data = text[i];
if (isspace(data)) {
if (count == 1)
continue;
data = ' ';
count = 1;
} else {
count = 0;
}
buf[j++] = data;
}
if (!add_to_dom_string(&string, buf, j)) {
done_dom_string(&string);
return DOM_STACK_CODE_ERROR_MEM_ALLOC;
}
}
if (node->data.text.allocated)
done_dom_string(&node->string);
set_dom_string(&node->string, string.string, string.length);
node->data.text.allocated = 1;
return DOM_STACK_CODE_OK;
}
static enum dom_stack_code
append_node_text(struct dom_config *config, struct dom_node *node)
{
struct dom_node *prev = get_dom_node_prev(node);
size_t length;
struct dom_string dest;
struct dom_string src;
int error = 0;
copy_struct(&src, &node->string);
if (!prev || prev->type != DOM_NODE_TEXT) {
/* Preserve text nodes with no one to append to. */
if (node->type == DOM_NODE_TEXT)
return DOM_STACK_CODE_OK;
prev = NULL;
set_dom_string(&dest, NULL, 0);
} else {
if (prev->data.text.allocated) {
copy_struct(&dest, &prev->string);
} else {
set_dom_string(&dest, NULL, 0);
if (!add_to_dom_string(&dest, prev->string.string, prev->string.length))
return DOM_STACK_CODE_ERROR_MEM_ALLOC;
set_dom_string(&prev->string, dest.string, dest.length);
prev->data.text.allocated = 1;
}
}
length = dest.length;
switch (node->type) {
case DOM_NODE_CDATA_SECTION:
case DOM_NODE_TEXT:
if (!add_to_dom_string(&dest, src.string, src.length))
error = 1;
break;
case DOM_NODE_ENTITY_REFERENCE:
/* FIXME: Until we will have uniform encoding at this point
* (UTF-8) we just add the entity reference unexpanded assuming
* that convert_string() will eventually do the work of
* expanding it. */
if (!add_to_dom_string(&dest, "&", 1)
|| !add_to_dom_string(&dest, src.string, src.length)
|| !add_to_dom_string(&dest, ";", 1)) {
error = 1;
}
break;
default:
INTERNAL("Cannot append from node %d", node->type);
}
if (error) {
if (prev)
prev->string.length = length;
else
done_dom_string(&dest);
return DOM_STACK_CODE_ERROR_MEM_ALLOC;
}
if (prev) {
copy_struct(&prev->string, &dest);
if ((config->flags & DOM_CONFIG_NORMALIZE_WHITESPACE)
&& node->type != DOM_NODE_ENTITY_REFERENCE) {
/* XXX: Ignore errors since we want to always
* free the appended node at this point. */
normalize_text_node_whitespace(prev);
}
return DOM_STACK_CODE_FREE_NODE;
} else {
int was_cdata_section = node->type == DOM_NODE_CDATA_SECTION;
node->type = DOM_NODE_TEXT;
memset(&node->data, 0, sizeof(node->data));
node->data.text.allocated = 1;
copy_struct(&node->string, &dest);
if ((config->flags & DOM_CONFIG_NORMALIZE_WHITESPACE)
&& was_cdata_section) {
/* XXX: Ignore errors since we want to always ok the
* append. */
normalize_text_node_whitespace(node);
}
return DOM_STACK_CODE_OK;
}
}
static enum dom_stack_code
dom_normalize_node_end(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_config *config = stack->current->data;
enum dom_stack_code code = DOM_STACK_CODE_OK;
switch (node->type) {
case DOM_NODE_ELEMENT:
if ((config->flags & DOM_CONFIG_UNKNOWN)
&& !node->data.element.type) {
/* Drop elements that are not known from the built-in
* node info. */
code = DOM_STACK_CODE_FREE_NODE;
}
break;
case DOM_NODE_ATTRIBUTE:
if ((config->flags & DOM_CONFIG_UNKNOWN)
&& !node->data.attribute.type) {
/* Drop elements that are not known from the built-in
* node info. */
code = DOM_STACK_CODE_FREE_NODE;
}
break;
case DOM_NODE_PROCESSING_INSTRUCTION:
if ((config->flags & DOM_CONFIG_UNKNOWN)
&& !node->data.proc_instruction.type) {
/* Drop elements that are not known from the built-in
* node info. */
code = DOM_STACK_CODE_FREE_NODE;
}
break;
case DOM_NODE_TEXT:
if (config->flags & DOM_CONFIG_NORMALIZE_CHARACTERS) {
code = append_node_text(config, node);
} else if (!(config->flags & DOM_CONFIG_ELEMENT_CONTENT_WHITESPACE)
&& node->data.text.only_space) {
/* Discard all Text nodes that contain
* whitespaces in element content]. */
code = DOM_STACK_CODE_FREE_NODE;
}
break;
case DOM_NODE_COMMENT:
if (!(config->flags & DOM_CONFIG_COMMENTS)) {
/* Discard all comments. */
code = DOM_STACK_CODE_FREE_NODE;
}
break;
case DOM_NODE_CDATA_SECTION:
if (!(config->flags & DOM_CONFIG_CDATA_SECTIONS)) {
/* Transform CDATASection nodes into Text nodes. The new Text
* node is then combined with any adjacent Text node. */
code = append_node_text(config, node);
}
break;
case DOM_NODE_ENTITY_REFERENCE:
if (!(config->flags & DOM_CONFIG_ENTITIES)) {
/* Remove all EntityReference nodes from the document,
* putting the entity expansions directly in their place. Text
* nodes are normalized. Only unexpanded entity references are
* kept in the document. */
code = append_node_text(config, node);
}
break;
case DOM_NODE_DOCUMENT:
mem_free(config);
break;
default:
break;
}
return code;
}
enum dom_stack_code
dom_normalize_text(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_config *config = stack->current->data;
if (config->flags & DOM_CONFIG_NORMALIZE_WHITESPACE) {
/* Normalize whitespace in the text. */
return normalize_text_node_whitespace(node);
}
return DOM_STACK_CODE_OK;
}
static struct dom_stack_context_info dom_config_normalizer_context = {
/* Object size: */ 0,
/* Push: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ NULL,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ dom_normalize_text,
/* DOM_NODE_CDATA_SECTION */ NULL,
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
},
/* Pop: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ dom_normalize_node_end,
/* DOM_NODE_ATTRIBUTE */ dom_normalize_node_end,
/* DOM_NODE_TEXT */ dom_normalize_node_end,
/* DOM_NODE_CDATA_SECTION */ dom_normalize_node_end,
/* DOM_NODE_ENTITY_REFERENCE */ dom_normalize_node_end,
/* DOM_NODE_ENTITY */ dom_normalize_node_end,
/* DOM_NODE_PROC_INSTRUCTION */ dom_normalize_node_end,
/* DOM_NODE_COMMENT */ dom_normalize_node_end,
/* DOM_NODE_DOCUMENT */ dom_normalize_node_end,
/* DOM_NODE_DOCUMENT_TYPE */ dom_normalize_node_end,
/* DOM_NODE_DOCUMENT_FRAGMENT */ dom_normalize_node_end,
/* DOM_NODE_NOTATION */ dom_normalize_node_end,
}
};
struct dom_config *
add_dom_config_normalizer(struct dom_stack *stack, enum dom_config_flag flags)
{
struct dom_config *config;
config = mem_calloc(1, sizeof(*config));
if (!config) return NULL;
config->flags = flags;
if (add_dom_stack_context(stack, config, &dom_config_normalizer_context))
return config;
mem_free(config);
return NULL;
}

90
src/dom/configuration.h Normal file
View File

@ -0,0 +1,90 @@
#ifndef EL__DOM_CONFIGURATION_H
#define EL__DOM_CONFIGURATION_H
struct dom_node;
struct dom_stack;
/* API Doc :: dom-config */
/** DOM Configuration
*
* The DOMConfiguration interface represents the configuration of a document.
* Using the configuration, it is possible to change the behaviour of how
* document normalization is done, such as replacing the CDATASection nodes
* with Text nodes.
*
* Note: Parameters are similar to features and properties used in SAX2 [SAX].
*
* The following list of parameters defined in the DOM: */
enum dom_config_flag {
/** "cdata-sections"
*
* The default is true and will keep CDATASection nodes in the
* document. When false, CDATASection nodes in the document are
* transformed into Text nodes. The new Text node is then combined with
* any adjacent Text node. */
DOM_CONFIG_CDATA_SECTIONS = 1,
/** "comments"
*
* If true (the default) keep Comment nodes in the document, else
* discard them. */
DOM_CONFIG_COMMENTS = 2,
/** "element-content-whitespace"
*
* The default is true and will keep all whitespaces in the document.
* When false, discard all Text nodes that contain only whitespaces. */
DOM_CONFIG_ELEMENT_CONTENT_WHITESPACE = 4,
/** "entities"
*
* When true (the default) keep EntityReference nodes in the document.
* When false, remove all EntityReference nodes from the document,
* putting the entity expansions directly in their place. Text nodes
* are normalized. Only unexpanded entity references are kept in the
* document. Note: This parameter does not affect Entity nodes. */
DOM_CONFIG_ENTITIES = 8,
/** "normalize-characters"
*
* The default is false, not to perform character normalization, else
* fully normalized the characters in the document as defined in
* appendix B of [XML 1.1]. */
DOM_CONFIG_NORMALIZE_CHARACTERS = 16,
/** "unknown"
*
* If false (default) nothing is done, else elements and attributes
* that are not known according to the built-in node info are
* discarded. */
DOM_CONFIG_UNKNOWN = 32,
/** "normalize-whitespace"
*
* If false (default) nothing is done, else all nodes are discarded
* once they have been traversed. */
DOM_CONFIG_NORMALIZE_WHITESPACE = 64,
};
struct dom_error;
struct dom_config {
enum dom_config_flag flags; /*: DOM configuration flags. */
/** FIXME: "error-handler"
*
* Contains an error handler. If an error is encountered in the
* document, this handler is called. When called, DOMError.relatedData
* will contain the closest node to where the error occurred. If the
* implementation is unable to determine the node where the error
* occurs, DOMError.relatedData will contain the Document node.
*/
void (*error_handler)(struct dom_config *, struct dom_error *);
};
struct dom_config *
add_dom_config_normalizer(struct dom_stack *stack, enum dom_config_flag flags);
#endif

View File

@ -225,16 +225,15 @@ get_dom_node_map_entry(struct dom_node_list *list, enum dom_node_type type,
return dom_node_list_bsearch(&search, list);
}
int
get_dom_node_list_index(struct dom_node *parent, struct dom_node *node)
static int
get_dom_node_list_pos(struct dom_node_list *list, struct dom_node *node)
{
struct dom_node_list **list = get_dom_node_list(parent, node);
struct dom_node *entry;
int i;
if (!list) return -1;
assert(list);
foreach_dom_node (*list, entry, i) {
foreach_dom_node (list, entry, i) {
if (entry == node)
return i;
}
@ -242,6 +241,74 @@ get_dom_node_list_index(struct dom_node *parent, struct dom_node *node)
return -1;
}
int
get_dom_node_list_index(struct dom_node *parent, struct dom_node *node)
{
struct dom_node_list **list = get_dom_node_list(parent, node);
return list ? get_dom_node_list_pos(*list, node) : -1;
}
struct dom_node *
get_dom_node_prev(struct dom_node *node)
{
struct dom_node_list **list;
int index;
assert(node->parent);
list = get_dom_node_list(node->parent, node);
if (!list) return NULL;
index = get_dom_node_list_pos(*list, node);
if (index > 0)
return (*list)->entries[index - 1];
return NULL;
}
struct dom_node *
get_dom_node_child(struct dom_node *parent, enum dom_node_type type,
int16_t subtype)
{
struct dom_node_list **list;
struct dom_node *node;
int index;
list = get_dom_node_list_by_type(parent, type);
if (!list) return NULL;
foreach_dom_node (*list, node, index) {
if (node->type != type)
continue;
if (!subtype) return node;
switch (type) {
case DOM_NODE_ELEMENT:
if (node->data.element.type == subtype)
return node;
break;
case DOM_NODE_ATTRIBUTE:
if (node->data.attribute.type == subtype)
return node;
break;
case DOM_NODE_PROCESSING_INSTRUCTION:
if (node->data.attribute.type == subtype)
return node;
break;
default:
return node;
}
}
return NULL;
}
/* Nodes */
struct dom_node *

View File

@ -243,6 +243,14 @@ int get_dom_node_list_index(struct dom_node *parent, struct dom_node *node);
* @list is already sorted properly. */
int get_dom_node_map_index(struct dom_node_list *list, struct dom_node *node);
/* Returns the previous sibling to the node. */
struct dom_node *get_dom_node_prev(struct dom_node *node);
/* Returns first text node of the element or NULL. */
struct dom_node *
get_dom_node_child(struct dom_node *node, enum dom_node_type child_type,
int16_t child_subtype);
/* Looks up the @node_map for a node matching the requested type and name.
* The @subtype maybe be 0 indication unknown subtype and only name should be
* tested else it will indicate either the element or attribute private
@ -304,17 +312,17 @@ struct dom_string *get_dom_node_value(struct dom_node *node);
/* Returns the name used for identifying the node type. */
struct dom_string *get_dom_node_type_name(enum dom_node_type type);
/* Based on the type of the parent and the node return a proper list
/* Based on the type of the parent and the node type return a proper list
* or NULL. This is useful when adding a node to a parent node. */
static inline struct dom_node_list **
get_dom_node_list(struct dom_node *parent, struct dom_node *node)
get_dom_node_list_by_type(struct dom_node *parent, enum dom_node_type type)
{
switch (parent->type) {
case DOM_NODE_DOCUMENT:
return &parent->data.document.children;
case DOM_NODE_ELEMENT:
switch (node->type) {
switch (type) {
case DOM_NODE_ATTRIBUTE:
return &parent->data.element.map;
@ -323,7 +331,7 @@ get_dom_node_list(struct dom_node *parent, struct dom_node *node)
}
case DOM_NODE_DOCUMENT_TYPE:
switch (node->type) {
switch (type) {
case DOM_NODE_ENTITY:
return &parent->data.document_type.entities;
@ -335,7 +343,7 @@ get_dom_node_list(struct dom_node *parent, struct dom_node *node)
}
case DOM_NODE_PROCESSING_INSTRUCTION:
switch (node->type) {
switch (type) {
case DOM_NODE_ATTRIBUTE:
return &parent->data.proc_instruction.map;
@ -348,4 +356,7 @@ get_dom_node_list(struct dom_node *parent, struct dom_node *node)
}
}
#define get_dom_node_list(parent, node) \
get_dom_node_list_by_type(parent, (node)->type)
#endif

View File

@ -506,7 +506,7 @@ parse_dom_select(struct dom_select *select, struct dom_stack *stack,
select->selector = select_node;
}
if (!push_dom_node(stack, &select_node->node))
if (push_dom_node(stack, &select_node->node) != DOM_STACK_CODE_OK)
return DOM_ERR_INVALID_STATE;
if (select_node->node.type != DOM_NODE_ELEMENT)
@ -897,7 +897,7 @@ match_element_selector(struct dom_select_node *selector, struct dom_node *node,
#define get_dom_select_data(stack) ((stack)->current->data)
/* Matches an element node being visited against the current selector stack. */
static void
enum dom_stack_code
dom_select_push_element(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_select_data *select_data = get_dom_select_data(stack);
@ -921,11 +921,13 @@ dom_select_push_element(struct dom_stack *stack, struct dom_node *node, void *da
if (selector)
push_dom_node(&select_data->stack, &selector->node);
}
return DOM_STACK_CODE_OK;
}
/* Ensures that nodes, no longer 'reachable' on the stack do not have any
* states associated with them on the select data stack. */
static void
enum dom_stack_code
dom_select_pop_element(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_select_data *select_data = get_dom_select_data(stack);
@ -944,12 +946,14 @@ dom_select_pop_element(struct dom_stack *stack, struct dom_node *node, void *dat
continue;
}
}
return DOM_STACK_CODE_OK;
}
/* For now this is only for matching the ':contains(<string>)' pseudo-class.
* Any node which can contain text and thus characters from the given <string>
* are handled in this common callback. */
static void
enum dom_stack_code
dom_select_push_text(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_select_data *select_data = get_dom_select_data(stack);
@ -961,7 +965,7 @@ dom_select_push_text(struct dom_stack *stack, struct dom_node *node, void *data)
WDBG("Text node: %d chars", node->string.length);
if (!text_sel)
return;
return DOM_STACK_CODE_OK;
text = &text_sel->node.string;
@ -973,6 +977,8 @@ dom_select_push_text(struct dom_stack *stack, struct dom_node *node, void *data)
default:
ERROR("Unhandled type");
}
return DOM_STACK_CODE_OK;
}
/* Context info for interacting with the DOM tree or stream stack. */
@ -1070,7 +1076,7 @@ select_dom_nodes(struct dom_select *select, struct dom_node *root)
&dom_select_data_context_info);
add_dom_stack_tracer(&select_data.stack, "select-match: ");
if (push_dom_node(&select_data.stack, &select->selector->node)) {
if (push_dom_node(&select_data.stack, &select->selector->node) == DOM_STACK_CODE_OK) {
get_dom_stack_top(&select_data.stack)->immutable = 1;
walk_dom_nodes(&stack, root);
}

View File

@ -39,7 +39,10 @@ add_sgml_document(struct dom_stack *stack, struct dom_string *string)
{
struct dom_node *node = init_dom_node(DOM_NODE_DOCUMENT, string);
return node ? push_dom_node(stack, node) : NULL;
if (node && push_dom_node(stack, node) == DOM_STACK_CODE_OK)
return node;
return NULL;
}
static inline struct dom_node *
@ -58,7 +61,7 @@ add_sgml_element(struct dom_stack *stack, struct dom_scanner_token *token)
node_info = get_sgml_node_info(parser->info->elements, node);
node->data.element.type = node_info->type;
if (!push_dom_node(stack, node))
if (push_dom_node(stack, node) != DOM_STACK_CODE_OK)
return NULL;
state = get_dom_stack_top(stack);
@ -92,7 +95,7 @@ add_sgml_attribute(struct dom_stack *stack,
if (valtoken && valtoken->type == SGML_TOKEN_STRING)
node->data.attribute.quoted = 1;
if (!node || !push_dom_node(stack, node))
if (!node || push_dom_node(stack, node) != DOM_STACK_CODE_OK)
return;
pop_dom_node(stack);
@ -119,7 +122,10 @@ add_sgml_proc_instruction(struct dom_stack *stack, struct dom_scanner_token *tar
node->data.proc_instruction.type = DOM_PROC_INSTRUCTION;
}
return push_dom_node(stack, node);
if (push_dom_node(stack, node) == DOM_STACK_CODE_OK)
return node;
return NULL;
}
static inline void
@ -133,7 +139,7 @@ add_sgml_node(struct dom_stack *stack, enum dom_node_type type, struct dom_scann
if (token->type == SGML_TOKEN_SPACE)
node->data.text.only_space = 1;
if (push_dom_node(stack, node))
if (push_dom_node(stack, node) == DOM_STACK_CODE_OK)
pop_dom_node(stack);
}
@ -403,7 +409,7 @@ parse_sgml(struct sgml_parser *parser, unsigned char *buf, size_t bufsize,
}
node = init_dom_node(DOM_NODE_TEXT, &source);
if (!node || !push_dom_node(&parser->parsing, node))
if (!node || push_dom_node(&parser->parsing, node) != DOM_STACK_CODE_OK)
return SGML_PARSER_CODE_MEM_ALLOC;
pop_dom_node(&parser->parsing);
@ -426,7 +432,7 @@ struct sgml_parsing_state {
size_t depth;
};
static void
enum dom_stack_code
sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct sgml_parser *parser = get_sgml_parser(stack);
@ -442,9 +448,11 @@ sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data)
SGML_STATE_TEXT, count_lines, complete, incremental,
detect_errors);
parser->code = parse_sgml_plain(&parser->stack, &parsing->scanner);
return DOM_STACK_CODE_OK;
}
static void
enum dom_stack_code
sgml_parsing_pop(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct sgml_parser *parser = get_sgml_parser(stack);
@ -458,6 +466,8 @@ sgml_parsing_pop(struct dom_stack *stack, struct dom_node *node, void *data)
}
assert(parsing->depth == parser->stack.depth);
return DOM_STACK_CODE_OK;
}
static struct dom_stack_context_info sgml_parsing_context_info = {

View File

@ -129,10 +129,12 @@ enum dom_stack_action {
DOM_STACK_POP,
};
static void
/* Returns whether the node should be freed with done_dom_node(). */
static int
call_dom_stack_callbacks(struct dom_stack *stack, struct dom_stack_state *state,
enum dom_stack_action action)
{
int free_node = 0;
int i;
for (i = 0; i < stack->contexts_size; i++) {
@ -148,13 +150,21 @@ call_dom_stack_callbacks(struct dom_stack *stack, struct dom_stack_state *state,
void *data = get_dom_stack_state_data(context, state);
stack->current = context;
callback(stack, state->node, data);
switch (callback(stack, state->node, data)) {
case DOM_STACK_CODE_FREE_NODE:
free_node = 1;
break;
default:
break;
}
stack->current = NULL;
}
}
return free_node;
}
struct dom_node *
enum dom_stack_code
push_dom_node(struct dom_stack *stack, struct dom_node *node)
{
struct dom_stack_state *state;
@ -164,13 +174,13 @@ push_dom_node(struct dom_stack *stack, struct dom_node *node)
assert(0 < node->type && node->type < DOM_NODES);
if (stack->depth > DOM_STACK_MAX_DEPTH) {
return NULL;
return DOM_STACK_CODE_ERROR_MAX_DEPTH;
}
state = realloc_dom_stack_states(&stack->states, stack->depth);
if (!state) {
done_dom_node(node);
return NULL;
return DOM_STACK_CODE_ERROR_MEM_ALLOC;
}
state += stack->depth;
@ -181,7 +191,7 @@ push_dom_node(struct dom_stack *stack, struct dom_node *node)
if (context->info->object_size
&& !realloc_dom_stack_state_objects(context, stack->depth)) {
done_dom_node(node);
return NULL;
return DOM_STACK_CODE_ERROR_MEM_ALLOC;
}
}
@ -193,7 +203,7 @@ push_dom_node(struct dom_stack *stack, struct dom_node *node)
stack->depth++;
call_dom_stack_callbacks(stack, state, DOM_STACK_PUSH);
return node;
return DOM_STACK_CODE_OK;
}
void
@ -211,9 +221,8 @@ pop_dom_node(struct dom_stack *stack)
if (state->immutable)
return;
call_dom_stack_callbacks(stack, state, DOM_STACK_POP);
if (stack->flags & DOM_STACK_FLAG_FREE_NODES)
if (call_dom_stack_callbacks(stack, state, DOM_STACK_POP)
|| (stack->flags & DOM_STACK_FLAG_FREE_NODES))
done_dom_node(state->node);
stack->depth--;
@ -349,7 +358,8 @@ walk_dom_nodes(struct dom_stack *stack, struct dom_node *root)
if (!context)
return;
push_dom_node(stack, root);
if (push_dom_node(stack, root) != DOM_STACK_CODE_OK)
return;
while (!dom_stack_is_empty(stack)) {
struct dom_stack_state *state = get_dom_stack_top(stack);
@ -488,7 +498,7 @@ static unsigned char indent_string[] =
#define get_indent_offset(stack) \
((stack)->depth < sizeof(indent_string)/2 ? (stack)->depth * 2 : sizeof(indent_string))
static void
enum dom_stack_code
dom_stack_trace_tree(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_string *value = &node->string;
@ -499,9 +509,11 @@ dom_stack_trace_tree(struct dom_stack *stack, struct dom_node *node, void *data)
get_indent_offset(stack), indent_string,
name->length, name->string,
value->length, value->string);
return DOM_STACK_CODE_OK;
}
static void
enum dom_stack_code
dom_stack_trace_id_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_string value;
@ -522,9 +534,11 @@ dom_stack_trace_id_leaf(struct dom_stack *stack, struct dom_node *node, void *da
if (is_dom_string_set(&value))
done_dom_string(&value);
return DOM_STACK_CODE_OK;
}
static void
enum dom_stack_code
dom_stack_trace_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_string *name;
@ -543,9 +557,11 @@ dom_stack_trace_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
if (is_dom_string_set(&value))
done_dom_string(&value);
return DOM_STACK_CODE_OK;
}
static void
enum dom_stack_code
dom_stack_trace_branch(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_string *name;
@ -560,6 +576,8 @@ dom_stack_trace_branch(struct dom_stack *stack, struct dom_node *node, void *dat
empty_string_or_(stack->current->data),
get_indent_offset(stack), indent_string,
id->length, id->string, name->length, name->string);
return DOM_STACK_CODE_OK;
}
struct dom_stack_context_info dom_stack_trace_context_info = {

View File

@ -9,10 +9,22 @@ struct dom_stack;
/* API Doc :: dom-stack */
/** DOM stack code
*
* Codes used by the DOM stack to indicate states.
*/
enum dom_stack_code {
DOM_STACK_CODE_OK, /*: All is well */
DOM_STACK_CODE_FREE_NODE, /*: Free the (popped) node */
DOM_STACK_CODE_ERROR_MEM_ALLOC, /*: Memory allocation failure */
DOM_STACK_CODE_ERROR_MAX_DEPTH, /*: Stack max depth reached */
};
/** DOM stack callback
*
* Used by contexts, for 'hooking' into the node traversing. */
typedef void (*dom_stack_callback_T)(struct dom_stack *, struct dom_node *, void *);
typedef enum dom_stack_code
(*dom_stack_callback_T)(struct dom_stack *, struct dom_node *, void *);
#define DOM_STACK_MAX_DEPTH 4096
@ -230,7 +242,7 @@ void done_dom_stack_context(struct dom_stack *stack, struct dom_stack_context *c
*
* If an error occurs the node is released with ref:[done_dom_node] and NULL is
* returned. Else the pushed node is returned. */
struct dom_node *push_dom_node(struct dom_stack *stack, struct dom_node *node);
enum dom_stack_code push_dom_node(struct dom_stack *stack, struct dom_node *node);
/** Pop the top stack state
*