From 0f8aa77ebbb2d040739f0b4fcf416fa72762eb39 Mon Sep 17 00:00:00 2001 From: Jonas Fonseca Date: Fri, 27 Jan 2006 07:49:15 +0100 Subject: [PATCH] Add test for incremental SGML parsing It is a loop that parses the same small document with various read sizes. The sgml-parser program is updated to take --stdin option taking a the read size as a required parameter. --- src/dom/test/sgml-parser.c | 62 ++++++++++++++++++++--- src/dom/test/test-sgml-parser-incremental | 49 ++++++++++++++++++ 2 files changed, 104 insertions(+), 7 deletions(-) create mode 100755 src/dom/test/test-sgml-parser-incremental diff --git a/src/dom/test/sgml-parser.c b/src/dom/test/sgml-parser.c index 0032db857..320758a6b 100644 --- a/src/dom/test/sgml-parser.c +++ b/src/dom/test/sgml-parser.c @@ -266,6 +266,7 @@ main(int argc, char *argv[]) enum dom_config_flag normalize_flags = 0; int normalize = 0; int complete = 1; + size_t read_stdin = 0; struct dom_string uri = INIT_DOM_STRING("dom://test", -1); struct dom_string source = INIT_DOM_STRING("(no source)", -1); int i; @@ -302,6 +303,20 @@ main(int argc, char *argv[]) set_dom_string(&source, argv[i], strlen(argv[i])); } + } else if (!strncmp(arg, "stdin", 5)) { + arg += 5; + if (*arg == '=') { + arg++; + read_stdin = atoi(arg); + set_dom_string(&source, arg, strlen(arg)); + } else { + i++; + if (i >= argc) + die("--stdin expects a number"); + read_stdin = atoi(argv[i]); + } + flags |= SGML_PARSER_INCREMENTAL; + } else if (!strncmp(arg, "normalize", 9)) { arg += 9; if (*arg == '=') { @@ -343,16 +358,49 @@ main(int argc, char *argv[]) else add_dom_stack_context(&parser->stack, NULL, &sgml_parser_test_context_info); - code = parse_sgml(parser, source.string, source.length, complete); + if (read_stdin > 0) { + unsigned char *buffer; + + buffer = mem_alloc(read_stdin); + if (!buffer) + die("Cannot allocate buffer"); + + complete = 0; + + while (!complete) { + size_t size = fread(buffer, 1, read_stdin, stdin); + + if (ferror(stdin)) + die("error reading from stdin"); + + complete = feof(stdin); + + code = parse_sgml(parser, buffer, size, complete); + switch (code) { + case SGML_PARSER_CODE_OK: + break; + + case SGML_PARSER_CODE_INCOMPLETE: + if (!complete) break; + /* Error */ + default: + complete = 1; + } + } + + mem_free(buffer); + + } else { + code = parse_sgml(parser, source.string, source.length, complete); + } + if (parser->root) { - size_t root_offset = parser->stack.depth - 1; + assert(!complete || parser->stack.depth > 0); - assert(!complete || root_offset == 0); - - get_dom_stack_state(&parser->stack, root_offset)->immutable = 0; - - while (!dom_stack_is_empty(&parser->stack)) + while (!dom_stack_is_empty(&parser->stack)) { + get_dom_stack_top(&parser->stack)->immutable = 0; pop_dom_node(&parser->stack); + } if (normalize) { struct dom_stack stack; diff --git a/src/dom/test/test-sgml-parser-incremental b/src/dom/test/test-sgml-parser-incremental new file mode 100755 index 000000000..a9896e52b --- /dev/null +++ b/src/dom/test/test-sgml-parser-incremental @@ -0,0 +1,49 @@ +#!/bin/sh +# +# Copyright (c) 2005 Jonas Fonseca +# + +test_description='Test incremental parsing of SGML documents. + +This test checks if the SGML parser correctly recovers during incremental +parsing. +' + +. "$TEST_LIB" + +test_output_equals () { + desc="$1"; shift + size="$1"; shift + src="$1"; shift + out="$1"; shift + + URI="test:$(echo "$desc" | sed ' + s/^[ \t]*\[[^]]*\][ \t]*//; + s/[:., \t][:., \t]*/-/g; + s/_/-/g; + # *cough* + y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/; + s/[^a-zA-Z0-9-]//g;')" + + echo "$src" | sgml-parser --uri "$URI" --stdin "$size" \ + | sed -e 's/^ //' | sed -n '$d;p' > output + echo "#document: $URI" > expected + echo "$out" | sed -n '2,$p' >> expected + + test_expect_success "$desc" 'cmp output expected' +} + +for i in 25 20 15 10 9 8 7 6 5 4 3 2 1; do + test_output_equals \ + "Incrementally parse a small document reading $i bytes at a time." \ + "$i" \ + '

Hello World!

' \ + ' +element: html + element: body + element: p + #text: Hello World!' + +done + +test_done