1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-12-04 14:46:47 -05:00

Add test for incremental SGML parsing

It is a loop that parses the same small document with various read sizes.
The sgml-parser program is updated to take --stdin option taking a the read
size as a required parameter.
This commit is contained in:
Jonas Fonseca 2006-01-27 07:49:15 +01:00 committed by Jonas Fonseca
parent b25cd27232
commit 0f8aa77ebb
2 changed files with 104 additions and 7 deletions

View File

@ -266,6 +266,7 @@ main(int argc, char *argv[])
enum dom_config_flag normalize_flags = 0; enum dom_config_flag normalize_flags = 0;
int normalize = 0; int normalize = 0;
int complete = 1; int complete = 1;
size_t read_stdin = 0;
struct dom_string uri = INIT_DOM_STRING("dom://test", -1); struct dom_string uri = INIT_DOM_STRING("dom://test", -1);
struct dom_string source = INIT_DOM_STRING("(no source)", -1); struct dom_string source = INIT_DOM_STRING("(no source)", -1);
int i; int i;
@ -302,6 +303,20 @@ main(int argc, char *argv[])
set_dom_string(&source, argv[i], strlen(argv[i])); set_dom_string(&source, argv[i], strlen(argv[i]));
} }
} else if (!strncmp(arg, "stdin", 5)) {
arg += 5;
if (*arg == '=') {
arg++;
read_stdin = atoi(arg);
set_dom_string(&source, arg, strlen(arg));
} else {
i++;
if (i >= argc)
die("--stdin expects a number");
read_stdin = atoi(argv[i]);
}
flags |= SGML_PARSER_INCREMENTAL;
} else if (!strncmp(arg, "normalize", 9)) { } else if (!strncmp(arg, "normalize", 9)) {
arg += 9; arg += 9;
if (*arg == '=') { if (*arg == '=') {
@ -343,16 +358,49 @@ main(int argc, char *argv[])
else else
add_dom_stack_context(&parser->stack, NULL, &sgml_parser_test_context_info); add_dom_stack_context(&parser->stack, NULL, &sgml_parser_test_context_info);
code = parse_sgml(parser, source.string, source.length, complete); if (read_stdin > 0) {
unsigned char *buffer;
buffer = mem_alloc(read_stdin);
if (!buffer)
die("Cannot allocate buffer");
complete = 0;
while (!complete) {
size_t size = fread(buffer, 1, read_stdin, stdin);
if (ferror(stdin))
die("error reading from stdin");
complete = feof(stdin);
code = parse_sgml(parser, buffer, size, complete);
switch (code) {
case SGML_PARSER_CODE_OK:
break;
case SGML_PARSER_CODE_INCOMPLETE:
if (!complete) break;
/* Error */
default:
complete = 1;
}
}
mem_free(buffer);
} else {
code = parse_sgml(parser, source.string, source.length, complete);
}
if (parser->root) { if (parser->root) {
size_t root_offset = parser->stack.depth - 1; assert(!complete || parser->stack.depth > 0);
assert(!complete || root_offset == 0); while (!dom_stack_is_empty(&parser->stack)) {
get_dom_stack_top(&parser->stack)->immutable = 0;
get_dom_stack_state(&parser->stack, root_offset)->immutable = 0;
while (!dom_stack_is_empty(&parser->stack))
pop_dom_node(&parser->stack); pop_dom_node(&parser->stack);
}
if (normalize) { if (normalize) {
struct dom_stack stack; struct dom_stack stack;

View File

@ -0,0 +1,49 @@
#!/bin/sh
#
# Copyright (c) 2005 Jonas Fonseca
#
test_description='Test incremental parsing of SGML documents.
This test checks if the SGML parser correctly recovers during incremental
parsing.
'
. "$TEST_LIB"
test_output_equals () {
desc="$1"; shift
size="$1"; shift
src="$1"; shift
out="$1"; shift
URI="test:$(echo "$desc" | sed '
s/^[ \t]*\[[^]]*\][ \t]*//;
s/[:., \t][:., \t]*/-/g;
s/_/-/g;
# *cough*
y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
s/[^a-zA-Z0-9-]//g;')"
echo "$src" | sgml-parser --uri "$URI" --stdin "$size" \
| sed -e 's/^ //' | sed -n '$d;p' > output
echo "#document: $URI" > expected
echo "$out" | sed -n '2,$p' >> expected
test_expect_success "$desc" 'cmp output expected'
}
for i in 25 20 15 10 9 8 7 6 5 4 3 2 1; do
test_output_equals \
"Incrementally parse a small document reading $i bytes at a time." \
"$i" \
'<html><body><p>Hello World!</p></body></html>' \
'
element: html
element: body
element: p
#text: Hello World!'
done
test_done