diff --git a/src/dom/configuration.h b/src/dom/configuration.h
index 0436ba5ae..f323c386f 100644
--- a/src/dom/configuration.h
+++ b/src/dom/configuration.h
@@ -63,8 +63,9 @@ enum dom_config_flag {
/** "normalize-whitespace"
*
- * If false (default) nothing is done, else all nodes are discarded
- * once they have been traversed. */
+ * If false (default) nothing is done, else all text nodes are
+ * normalized so that sequences of space characters are changed to
+ * being only a single space. */
DOM_CONFIG_NORMALIZE_WHITESPACE = 64,
};
diff --git a/src/dom/stack.c b/src/dom/stack.c
index 61bd75a68..328a7ef4c 100644
--- a/src/dom/stack.c
+++ b/src/dom/stack.c
@@ -420,7 +420,7 @@ walk_dom_nodes(struct dom_stack *stack, struct dom_node *root)
if (is_dom_node_list_member(list, wstate->index)) {
struct dom_node *child = list->entries[wstate->index++];
- if (push_dom_node(stack, child))
+ if (push_dom_node(stack, child) == DOM_STACK_CODE_OK)
continue;
}
diff --git a/src/dom/test/sgml-parser.c b/src/dom/test/sgml-parser.c
index 28c5bd6c8..0032db857 100644
--- a/src/dom/test/sgml-parser.c
+++ b/src/dom/test/sgml-parser.c
@@ -11,6 +11,7 @@
#include "elinks.h"
+#include "dom/configuration.h"
#include "dom/node.h"
#include "dom/sgml/parser.h"
#include "dom/stack.h"
@@ -260,7 +261,10 @@ main(int argc, char *argv[])
struct sgml_parser *parser;
enum sgml_document_type doctype = SGML_DOCTYPE_HTML;
enum sgml_parser_flag flags = 0;
+ enum sgml_parser_type type = SGML_PARSER_STREAM;
enum sgml_parser_code code = 0;
+ enum dom_config_flag normalize_flags = 0;
+ int normalize = 0;
int complete = 1;
struct dom_string uri = INIT_DOM_STRING("dom://test", -1);
struct dom_string source = INIT_DOM_STRING("(no source)", -1);
@@ -298,6 +302,20 @@ main(int argc, char *argv[])
set_dom_string(&source, argv[i], strlen(argv[i]));
}
+ } else if (!strncmp(arg, "normalize", 9)) {
+ arg += 9;
+ if (*arg == '=') {
+ arg++;
+ } else {
+ i++;
+ if (i >= argc)
+ die("--normalize expects a string");
+ arg = argv[i];
+ }
+ normalize = 1;
+ normalize_flags = parse_dom_config(arg, ',');
+ type = SGML_PARSER_TREE;
+
} else if (!strcmp(arg, "print-lines")) {
flags |= SGML_PARSER_COUNT_LINES;
@@ -316,11 +334,14 @@ main(int argc, char *argv[])
}
}
- parser = init_sgml_parser(SGML_PARSER_STREAM, doctype, &uri, flags);
+ parser = init_sgml_parser(type, doctype, &uri, flags);
if (!parser) return 1;
parser->error_func = sgml_error_function;
- add_dom_stack_context(&parser->stack, NULL, &sgml_parser_test_context_info);
+ if (normalize)
+ add_dom_config_normalizer(&parser->stack, normalize_flags);
+ else
+ add_dom_stack_context(&parser->stack, NULL, &sgml_parser_test_context_info);
code = parse_sgml(parser, source.string, source.length, complete);
if (parser->root) {
@@ -330,13 +351,30 @@ main(int argc, char *argv[])
get_dom_stack_state(&parser->stack, root_offset)->immutable = 0;
- /* For SGML_PARSER_STREAM this will free the DOM
- * root node. */
while (!dom_stack_is_empty(&parser->stack))
pop_dom_node(&parser->stack);
+
+ if (normalize) {
+ struct dom_stack stack;
+
+ /* Note, that we cannot free nodes when walking the DOM
+ * tree since walk_dom_node() uses an index to traverse
+ * the tree. */
+ init_dom_stack(&stack, DOM_STACK_FLAG_NONE);
+ /* XXX: This context needs to be added first because it
+ * assumes the parser can be accessed via
+ * stack->contexts[0].data. */
+ add_dom_stack_context(&stack, parser, &sgml_parser_test_context_info);
+ walk_dom_nodes(&stack, parser->root);
+ done_dom_stack(&stack);
+ done_dom_node(parser->root);
+ }
}
done_sgml_parser(parser);
+#ifdef DEBUG_MEMLEAK
+ check_memory_leaks();
+#endif
return code;
}
diff --git a/src/dom/test/test-dom-configuration-basic b/src/dom/test/test-dom-configuration-basic
new file mode 100755
index 000000000..cd217b1ef
--- /dev/null
+++ b/src/dom/test/test-dom-configuration-basic
@@ -0,0 +1,236 @@
+#!/bin/sh
+#
+# Copyright (c) 2005 Jonas Fonseca
+#
+
+test_description='Test the DOM configuration module
+
+This test checks that the normalization performed by the DOM configuration
+is done correctly.
+'
+
+. "$TEST_LIB"
+
+test_normalize_output_equals () {
+ desc="$1"; shift
+ config="$1"; shift
+ src="$1"; shift
+ out="$1"; shift
+
+ URI="test:$(echo "$desc" | sed '
+ s/^[ \t]*\[[^]]*\][ \t]*//;
+ s/[:., \t][:., \t]*/-/g;
+ s/_/-/g;
+ # *cough*
+ y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
+ s/[^a-zA-Z0-9-]//g;')"
+
+ sgml-parser --src "$src" --normalize "$config" --uri "$URI" --src "$src" | sed 's/^ //' > output
+ echo "#document: $URI" > expected
+ echo "$out" | sed -n '2,$p' >> expected
+
+ test_expect_success "$desc" 'cmp output expected'
+}
+
+
+## Config strings ###########################################################
+
+NOOP='cdata-sections,comments,element-content-whitespace,entities'
+NOCOMMENTS='cdata-sections,element-content-whitespace,entities'
+CDATA2TEXT='comments,element-content-whitespace,entities'
+ENTITIES='cdata-section,comments,element-content-whitespace'
+NOWSTEXT='cdata-section,comments,entities'
+NORM1=''
+
+
+## No-ops ###################################################################
+
+test_normalize_output_equals \
+'Normalization no-op.' \
+"$NOOP" \
+'