Add test for incremental SGML parsing

It is a loop that parses the same small document with various read sizes. The sgml-parser program is updated to take --stdin option taking a the read size as a required parameter.
2024-12-04 14:46:47 -05:00 · 2006-01-27 07:49:15 +01:00 · 2006-01-27 07:49:15 +01:00 · 0f8aa77ebb
commit 0f8aa77ebb
parent b25cd27232
2 changed files with 104 additions and 7 deletions
--- a/src/dom/test/sgml-parser.c
+++ b/src/dom/test/sgml-parser.c
@ -266,6 +266,7 @@ main(int argc, char *argv[])
 	enum dom_config_flag normalize_flags = 0;
 	int normalize = 0;
 	int complete = 1;
+	size_t read_stdin = 0;
 	struct dom_string uri = INIT_DOM_STRING("dom://test", -1);
 	struct dom_string source = INIT_DOM_STRING("(no source)", -1);
 	int i;
@ -302,6 +303,20 @@ main(int argc, char *argv[])
 				set_dom_string(&source, argv[i], strlen(argv[i]));
 			}

+		} else if (!strncmp(arg, "stdin", 5)) {
+			arg += 5;
+			if (*arg == '=') {
+				arg++;
+				read_stdin = atoi(arg);
+				set_dom_string(&source, arg, strlen(arg));
+			} else {
+				i++;
+				if (i >= argc)
+					die("--stdin expects a number");
+				read_stdin = atoi(argv[i]);
+			}
+			flags |= SGML_PARSER_INCREMENTAL;
+
 		} else if (!strncmp(arg, "normalize", 9)) {
 			arg += 9;
 			if (*arg == '=') {
@ -343,16 +358,49 @@ main(int argc, char *argv[])
 	else
 		add_dom_stack_context(&parser->stack, NULL, &sgml_parser_test_context_info);

-	code = parse_sgml(parser, source.string, source.length, complete);
+	if (read_stdin > 0) {
+		unsigned char *buffer;
+
+		buffer = mem_alloc(read_stdin);
+		if (!buffer)
+			die("Cannot allocate buffer");
+
+		complete = 0;
+
+		while (!complete) {
+			size_t size = fread(buffer, 1, read_stdin, stdin);
+
+			if (ferror(stdin))
+				die("error reading from stdin");
+
+			complete = feof(stdin);
+
+			code = parse_sgml(parser, buffer, size, complete);
+			switch (code) {
+			case SGML_PARSER_CODE_OK:
+				break;
+
+			case SGML_PARSER_CODE_INCOMPLETE:
+				if (!complete) break;
+				/* Error */
+			default:
+				complete = 1;
+			}
+		}
+
+		mem_free(buffer);
+
+	} else {
+		code = parse_sgml(parser, source.string, source.length, complete);
+	}
+
 	if (parser->root) {
-		size_t root_offset = parser->stack.depth - 1;
+		assert(!complete || parser->stack.depth > 0);

-		assert(!complete || root_offset == 0);
-
-		get_dom_stack_state(&parser->stack, root_offset)->immutable = 0;
-
-		while (!dom_stack_is_empty(&parser->stack))
+		while (!dom_stack_is_empty(&parser->stack)) {
+			get_dom_stack_top(&parser->stack)->immutable = 0;
 			pop_dom_node(&parser->stack);
+		}

 		if (normalize) {
 			struct dom_stack stack;
--- a/src/dom/test/test-sgml-parser-incremental
+++ b/src/dom/test/test-sgml-parser-incremental
@ -0,0 +1,49 @@
+#!/bin/sh
+#
+# Copyright (c) 2005 Jonas Fonseca
+#
+
+test_description='Test incremental parsing of SGML documents.
+
+This test checks if the SGML parser correctly recovers during incremental
+parsing.
+'
+
+. "$TEST_LIB"
+
+test_output_equals () {
+	desc="$1"; shift
+	size="$1"; shift
+	src="$1"; shift
+	out="$1"; shift
+
+	URI="test:$(echo "$desc" | sed '
+		s/^[ \t]*\[[^]]*\][ \t]*//;
+		s/[:., \t][:., \t]*/-/g;
+		s/_/-/g;
+		# *cough*
+		y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
+		s/[^a-zA-Z0-9-]//g;')"
+
+	echo "$src" | sgml-parser --uri "$URI" --stdin "$size" \
+	| sed -e 's/^  //' | sed -n '$d;p' > output
+	echo "#document: $URI" > expected
+	echo "$out" | sed -n '2,$p' >> expected
+
+	test_expect_success "$desc" 'cmp output expected' 
+}
+
+for i in 25 20 15 10 9 8 7 6 5 4 3 2 1; do
+	test_output_equals \
+	"Incrementally parse a small document reading $i bytes at a time." \
+	"$i" \
+	'<html><body><p>Hello World!</p></body></html>' \
+	'
+element: html
+  element: body
+    element: p
+      #text: Hello World!'
+
+done
+
+test_done