mirror of
https://github.com/rkd77/elinks.git
synced 2024-12-04 14:46:47 -05:00
Add test for incremental SGML parsing
It is a loop that parses the same small document with various read sizes. The sgml-parser program is updated to take --stdin option taking a the read size as a required parameter.
This commit is contained in:
parent
b25cd27232
commit
0f8aa77ebb
@ -266,6 +266,7 @@ main(int argc, char *argv[])
|
|||||||
enum dom_config_flag normalize_flags = 0;
|
enum dom_config_flag normalize_flags = 0;
|
||||||
int normalize = 0;
|
int normalize = 0;
|
||||||
int complete = 1;
|
int complete = 1;
|
||||||
|
size_t read_stdin = 0;
|
||||||
struct dom_string uri = INIT_DOM_STRING("dom://test", -1);
|
struct dom_string uri = INIT_DOM_STRING("dom://test", -1);
|
||||||
struct dom_string source = INIT_DOM_STRING("(no source)", -1);
|
struct dom_string source = INIT_DOM_STRING("(no source)", -1);
|
||||||
int i;
|
int i;
|
||||||
@ -302,6 +303,20 @@ main(int argc, char *argv[])
|
|||||||
set_dom_string(&source, argv[i], strlen(argv[i]));
|
set_dom_string(&source, argv[i], strlen(argv[i]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} else if (!strncmp(arg, "stdin", 5)) {
|
||||||
|
arg += 5;
|
||||||
|
if (*arg == '=') {
|
||||||
|
arg++;
|
||||||
|
read_stdin = atoi(arg);
|
||||||
|
set_dom_string(&source, arg, strlen(arg));
|
||||||
|
} else {
|
||||||
|
i++;
|
||||||
|
if (i >= argc)
|
||||||
|
die("--stdin expects a number");
|
||||||
|
read_stdin = atoi(argv[i]);
|
||||||
|
}
|
||||||
|
flags |= SGML_PARSER_INCREMENTAL;
|
||||||
|
|
||||||
} else if (!strncmp(arg, "normalize", 9)) {
|
} else if (!strncmp(arg, "normalize", 9)) {
|
||||||
arg += 9;
|
arg += 9;
|
||||||
if (*arg == '=') {
|
if (*arg == '=') {
|
||||||
@ -343,16 +358,49 @@ main(int argc, char *argv[])
|
|||||||
else
|
else
|
||||||
add_dom_stack_context(&parser->stack, NULL, &sgml_parser_test_context_info);
|
add_dom_stack_context(&parser->stack, NULL, &sgml_parser_test_context_info);
|
||||||
|
|
||||||
code = parse_sgml(parser, source.string, source.length, complete);
|
if (read_stdin > 0) {
|
||||||
|
unsigned char *buffer;
|
||||||
|
|
||||||
|
buffer = mem_alloc(read_stdin);
|
||||||
|
if (!buffer)
|
||||||
|
die("Cannot allocate buffer");
|
||||||
|
|
||||||
|
complete = 0;
|
||||||
|
|
||||||
|
while (!complete) {
|
||||||
|
size_t size = fread(buffer, 1, read_stdin, stdin);
|
||||||
|
|
||||||
|
if (ferror(stdin))
|
||||||
|
die("error reading from stdin");
|
||||||
|
|
||||||
|
complete = feof(stdin);
|
||||||
|
|
||||||
|
code = parse_sgml(parser, buffer, size, complete);
|
||||||
|
switch (code) {
|
||||||
|
case SGML_PARSER_CODE_OK:
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SGML_PARSER_CODE_INCOMPLETE:
|
||||||
|
if (!complete) break;
|
||||||
|
/* Error */
|
||||||
|
default:
|
||||||
|
complete = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mem_free(buffer);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
code = parse_sgml(parser, source.string, source.length, complete);
|
||||||
|
}
|
||||||
|
|
||||||
if (parser->root) {
|
if (parser->root) {
|
||||||
size_t root_offset = parser->stack.depth - 1;
|
assert(!complete || parser->stack.depth > 0);
|
||||||
|
|
||||||
assert(!complete || root_offset == 0);
|
while (!dom_stack_is_empty(&parser->stack)) {
|
||||||
|
get_dom_stack_top(&parser->stack)->immutable = 0;
|
||||||
get_dom_stack_state(&parser->stack, root_offset)->immutable = 0;
|
|
||||||
|
|
||||||
while (!dom_stack_is_empty(&parser->stack))
|
|
||||||
pop_dom_node(&parser->stack);
|
pop_dom_node(&parser->stack);
|
||||||
|
}
|
||||||
|
|
||||||
if (normalize) {
|
if (normalize) {
|
||||||
struct dom_stack stack;
|
struct dom_stack stack;
|
||||||
|
49
src/dom/test/test-sgml-parser-incremental
Executable file
49
src/dom/test/test-sgml-parser-incremental
Executable file
@ -0,0 +1,49 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# Copyright (c) 2005 Jonas Fonseca
|
||||||
|
#
|
||||||
|
|
||||||
|
test_description='Test incremental parsing of SGML documents.
|
||||||
|
|
||||||
|
This test checks if the SGML parser correctly recovers during incremental
|
||||||
|
parsing.
|
||||||
|
'
|
||||||
|
|
||||||
|
. "$TEST_LIB"
|
||||||
|
|
||||||
|
test_output_equals () {
|
||||||
|
desc="$1"; shift
|
||||||
|
size="$1"; shift
|
||||||
|
src="$1"; shift
|
||||||
|
out="$1"; shift
|
||||||
|
|
||||||
|
URI="test:$(echo "$desc" | sed '
|
||||||
|
s/^[ \t]*\[[^]]*\][ \t]*//;
|
||||||
|
s/[:., \t][:., \t]*/-/g;
|
||||||
|
s/_/-/g;
|
||||||
|
# *cough*
|
||||||
|
y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
|
||||||
|
s/[^a-zA-Z0-9-]//g;')"
|
||||||
|
|
||||||
|
echo "$src" | sgml-parser --uri "$URI" --stdin "$size" \
|
||||||
|
| sed -e 's/^ //' | sed -n '$d;p' > output
|
||||||
|
echo "#document: $URI" > expected
|
||||||
|
echo "$out" | sed -n '2,$p' >> expected
|
||||||
|
|
||||||
|
test_expect_success "$desc" 'cmp output expected'
|
||||||
|
}
|
||||||
|
|
||||||
|
for i in 25 20 15 10 9 8 7 6 5 4 3 2 1; do
|
||||||
|
test_output_equals \
|
||||||
|
"Incrementally parse a small document reading $i bytes at a time." \
|
||||||
|
"$i" \
|
||||||
|
'<html><body><p>Hello World!</p></body></html>' \
|
||||||
|
'
|
||||||
|
element: html
|
||||||
|
element: body
|
||||||
|
element: p
|
||||||
|
#text: Hello World!'
|
||||||
|
|
||||||
|
done
|
||||||
|
|
||||||
|
test_done
|
Loading…
Reference in New Issue
Block a user