mirror of
https://github.com/rkd77/elinks.git
synced 2024-12-04 14:46:47 -05:00
Add test for incremental SGML parsing
It is a loop that parses the same small document with various read sizes. The sgml-parser program is updated to take --stdin option taking a the read size as a required parameter.
This commit is contained in:
parent
b25cd27232
commit
0f8aa77ebb
@ -266,6 +266,7 @@ main(int argc, char *argv[])
|
||||
enum dom_config_flag normalize_flags = 0;
|
||||
int normalize = 0;
|
||||
int complete = 1;
|
||||
size_t read_stdin = 0;
|
||||
struct dom_string uri = INIT_DOM_STRING("dom://test", -1);
|
||||
struct dom_string source = INIT_DOM_STRING("(no source)", -1);
|
||||
int i;
|
||||
@ -302,6 +303,20 @@ main(int argc, char *argv[])
|
||||
set_dom_string(&source, argv[i], strlen(argv[i]));
|
||||
}
|
||||
|
||||
} else if (!strncmp(arg, "stdin", 5)) {
|
||||
arg += 5;
|
||||
if (*arg == '=') {
|
||||
arg++;
|
||||
read_stdin = atoi(arg);
|
||||
set_dom_string(&source, arg, strlen(arg));
|
||||
} else {
|
||||
i++;
|
||||
if (i >= argc)
|
||||
die("--stdin expects a number");
|
||||
read_stdin = atoi(argv[i]);
|
||||
}
|
||||
flags |= SGML_PARSER_INCREMENTAL;
|
||||
|
||||
} else if (!strncmp(arg, "normalize", 9)) {
|
||||
arg += 9;
|
||||
if (*arg == '=') {
|
||||
@ -343,16 +358,49 @@ main(int argc, char *argv[])
|
||||
else
|
||||
add_dom_stack_context(&parser->stack, NULL, &sgml_parser_test_context_info);
|
||||
|
||||
code = parse_sgml(parser, source.string, source.length, complete);
|
||||
if (read_stdin > 0) {
|
||||
unsigned char *buffer;
|
||||
|
||||
buffer = mem_alloc(read_stdin);
|
||||
if (!buffer)
|
||||
die("Cannot allocate buffer");
|
||||
|
||||
complete = 0;
|
||||
|
||||
while (!complete) {
|
||||
size_t size = fread(buffer, 1, read_stdin, stdin);
|
||||
|
||||
if (ferror(stdin))
|
||||
die("error reading from stdin");
|
||||
|
||||
complete = feof(stdin);
|
||||
|
||||
code = parse_sgml(parser, buffer, size, complete);
|
||||
switch (code) {
|
||||
case SGML_PARSER_CODE_OK:
|
||||
break;
|
||||
|
||||
case SGML_PARSER_CODE_INCOMPLETE:
|
||||
if (!complete) break;
|
||||
/* Error */
|
||||
default:
|
||||
complete = 1;
|
||||
}
|
||||
}
|
||||
|
||||
mem_free(buffer);
|
||||
|
||||
} else {
|
||||
code = parse_sgml(parser, source.string, source.length, complete);
|
||||
}
|
||||
|
||||
if (parser->root) {
|
||||
size_t root_offset = parser->stack.depth - 1;
|
||||
assert(!complete || parser->stack.depth > 0);
|
||||
|
||||
assert(!complete || root_offset == 0);
|
||||
|
||||
get_dom_stack_state(&parser->stack, root_offset)->immutable = 0;
|
||||
|
||||
while (!dom_stack_is_empty(&parser->stack))
|
||||
while (!dom_stack_is_empty(&parser->stack)) {
|
||||
get_dom_stack_top(&parser->stack)->immutable = 0;
|
||||
pop_dom_node(&parser->stack);
|
||||
}
|
||||
|
||||
if (normalize) {
|
||||
struct dom_stack stack;
|
||||
|
49
src/dom/test/test-sgml-parser-incremental
Executable file
49
src/dom/test/test-sgml-parser-incremental
Executable file
@ -0,0 +1,49 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copyright (c) 2005 Jonas Fonseca
|
||||
#
|
||||
|
||||
test_description='Test incremental parsing of SGML documents.
|
||||
|
||||
This test checks if the SGML parser correctly recovers during incremental
|
||||
parsing.
|
||||
'
|
||||
|
||||
. "$TEST_LIB"
|
||||
|
||||
test_output_equals () {
|
||||
desc="$1"; shift
|
||||
size="$1"; shift
|
||||
src="$1"; shift
|
||||
out="$1"; shift
|
||||
|
||||
URI="test:$(echo "$desc" | sed '
|
||||
s/^[ \t]*\[[^]]*\][ \t]*//;
|
||||
s/[:., \t][:., \t]*/-/g;
|
||||
s/_/-/g;
|
||||
# *cough*
|
||||
y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
|
||||
s/[^a-zA-Z0-9-]//g;')"
|
||||
|
||||
echo "$src" | sgml-parser --uri "$URI" --stdin "$size" \
|
||||
| sed -e 's/^ //' | sed -n '$d;p' > output
|
||||
echo "#document: $URI" > expected
|
||||
echo "$out" | sed -n '2,$p' >> expected
|
||||
|
||||
test_expect_success "$desc" 'cmp output expected'
|
||||
}
|
||||
|
||||
for i in 25 20 15 10 9 8 7 6 5 4 3 2 1; do
|
||||
test_output_equals \
|
||||
"Incrementally parse a small document reading $i bytes at a time." \
|
||||
"$i" \
|
||||
'<html><body><p>Hello World!</p></body></html>' \
|
||||
'
|
||||
element: html
|
||||
element: body
|
||||
element: p
|
||||
#text: Hello World!'
|
||||
|
||||
done
|
||||
|
||||
test_done
|
Loading…
Reference in New Issue
Block a user