mirror of
https://github.com/rkd77/elinks.git
synced 2024-12-04 14:46:47 -05:00
Add support for testing normalization using the DOM configuration module
This commit is contained in:
parent
cc61578fcb
commit
2eba71d95b
@ -11,6 +11,7 @@
|
||||
|
||||
#include "elinks.h"
|
||||
|
||||
#include "dom/configuration.h"
|
||||
#include "dom/node.h"
|
||||
#include "dom/sgml/parser.h"
|
||||
#include "dom/stack.h"
|
||||
@ -260,7 +261,10 @@ main(int argc, char *argv[])
|
||||
struct sgml_parser *parser;
|
||||
enum sgml_document_type doctype = SGML_DOCTYPE_HTML;
|
||||
enum sgml_parser_flag flags = 0;
|
||||
enum sgml_parser_type type = SGML_PARSER_STREAM;
|
||||
enum sgml_parser_code code = 0;
|
||||
enum dom_config_flag normalize_flags = 0;
|
||||
int normalize = 0;
|
||||
int complete = 1;
|
||||
struct dom_string uri = INIT_DOM_STRING("dom://test", -1);
|
||||
struct dom_string source = INIT_DOM_STRING("(no source)", -1);
|
||||
@ -298,6 +302,20 @@ main(int argc, char *argv[])
|
||||
set_dom_string(&source, argv[i], strlen(argv[i]));
|
||||
}
|
||||
|
||||
} else if (!strncmp(arg, "normalize", 9)) {
|
||||
arg += 9;
|
||||
if (*arg == '=') {
|
||||
arg++;
|
||||
} else {
|
||||
i++;
|
||||
if (i >= argc)
|
||||
die("--normalize expects a string");
|
||||
arg = argv[i];
|
||||
}
|
||||
normalize = 1;
|
||||
normalize_flags = parse_dom_config(arg, ',');
|
||||
type = SGML_PARSER_TREE;
|
||||
|
||||
} else if (!strcmp(arg, "print-lines")) {
|
||||
flags |= SGML_PARSER_COUNT_LINES;
|
||||
|
||||
@ -316,10 +334,13 @@ main(int argc, char *argv[])
|
||||
}
|
||||
}
|
||||
|
||||
parser = init_sgml_parser(SGML_PARSER_STREAM, doctype, &uri, flags);
|
||||
parser = init_sgml_parser(type, doctype, &uri, flags);
|
||||
if (!parser) return 1;
|
||||
|
||||
parser->error_func = sgml_error_function;
|
||||
if (normalize)
|
||||
add_dom_config_normalizer(&parser->stack, normalize_flags);
|
||||
else
|
||||
add_dom_stack_context(&parser->stack, NULL, &sgml_parser_test_context_info);
|
||||
|
||||
code = parse_sgml(parser, source.string, source.length, complete);
|
||||
@ -330,13 +351,30 @@ main(int argc, char *argv[])
|
||||
|
||||
get_dom_stack_state(&parser->stack, root_offset)->immutable = 0;
|
||||
|
||||
/* For SGML_PARSER_STREAM this will free the DOM
|
||||
* root node. */
|
||||
while (!dom_stack_is_empty(&parser->stack))
|
||||
pop_dom_node(&parser->stack);
|
||||
|
||||
if (normalize) {
|
||||
struct dom_stack stack;
|
||||
|
||||
/* Note, that we cannot free nodes when walking the DOM
|
||||
* tree since walk_dom_node() uses an index to traverse
|
||||
* the tree. */
|
||||
init_dom_stack(&stack, DOM_STACK_FLAG_NONE);
|
||||
/* XXX: This context needs to be added first because it
|
||||
* assumes the parser can be accessed via
|
||||
* stack->contexts[0].data. */
|
||||
add_dom_stack_context(&stack, parser, &sgml_parser_test_context_info);
|
||||
walk_dom_nodes(&stack, parser->root);
|
||||
done_dom_stack(&stack);
|
||||
done_dom_node(parser->root);
|
||||
}
|
||||
}
|
||||
|
||||
done_sgml_parser(parser);
|
||||
#ifdef DEBUG_MEMLEAK
|
||||
check_memory_leaks();
|
||||
#endif
|
||||
|
||||
return code;
|
||||
}
|
||||
|
236
src/dom/test/test-dom-configuration-basic
Executable file
236
src/dom/test/test-dom-configuration-basic
Executable file
@ -0,0 +1,236 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copyright (c) 2005 Jonas Fonseca
|
||||
#
|
||||
|
||||
test_description='Test the DOM configuration module
|
||||
|
||||
This test checks that the normalization performed by the DOM configuration
|
||||
is done correctly.
|
||||
'
|
||||
|
||||
. "$TEST_LIB"
|
||||
|
||||
test_normalize_output_equals () {
|
||||
desc="$1"; shift
|
||||
config="$1"; shift
|
||||
src="$1"; shift
|
||||
out="$1"; shift
|
||||
|
||||
URI="test:$(echo "$desc" | sed '
|
||||
s/^[ \t]*\[[^]]*\][ \t]*//;
|
||||
s/[:., \t][:., \t]*/-/g;
|
||||
s/_/-/g;
|
||||
# *cough*
|
||||
y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
|
||||
s/[^a-zA-Z0-9-]//g;')"
|
||||
|
||||
sgml-parser --src "$src" --normalize "$config" --uri "$URI" --src "$src" | sed 's/^ //' > output
|
||||
echo "#document: $URI" > expected
|
||||
echo "$out" | sed -n '2,$p' >> expected
|
||||
|
||||
test_expect_success "$desc" 'cmp output expected'
|
||||
}
|
||||
|
||||
|
||||
## Config strings ###########################################################
|
||||
|
||||
NOOP='cdata-sections,comments,element-content-whitespace,entities'
|
||||
NOCOMMENTS='cdata-sections,element-content-whitespace,entities'
|
||||
CDATA2TEXT='comments,element-content-whitespace,entities'
|
||||
ENTITIES='cdata-section,comments,element-content-whitespace'
|
||||
NOWSTEXT='cdata-section,comments,entities'
|
||||
NORM1=''
|
||||
|
||||
|
||||
## No-ops ###################################################################
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Normalization no-op.' \
|
||||
"$NOOP" \
|
||||
'<roswell> <![CDATA[| that |]]><!-- ends --> well</roswell>' \
|
||||
'
|
||||
element: roswell
|
||||
#text:
|
||||
#cdata-section: | that |
|
||||
#comment: ends
|
||||
#text: well'
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Keep comments.' \
|
||||
"$NOOP" \
|
||||
'and<!-- comment:1 -->or<!-- comment:2 -->' \
|
||||
'
|
||||
#text: and
|
||||
#comment: comment:1
|
||||
#text: or
|
||||
#comment: comment:2 '
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Keep CDATA sections ' \
|
||||
"$NOOP" \
|
||||
'<![CDATA[and]]>or<![CDATA[maybe]]>' \
|
||||
'
|
||||
#cdata-section: and
|
||||
#text: or
|
||||
#cdata-section: maybe'
|
||||
|
||||
|
||||
## Comments #################################################################
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Remove comments. (I)' \
|
||||
"$NOCOMMENTS" \
|
||||
"<no><!-- comment -->?</no>" \
|
||||
'
|
||||
element: no
|
||||
#text: ?'
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Remove comments. (II)' \
|
||||
"$NOCOMMENTS" \
|
||||
'<!-- comment:1 -->and<!-- comment:2 -->' \
|
||||
'
|
||||
#text: and'
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Remove comments. (III)' \
|
||||
"$NOCOMMENTS" \
|
||||
'nothing to see <!-- comment -->here' \
|
||||
'
|
||||
#text: nothing to see here'
|
||||
|
||||
|
||||
## Entities #################################################################
|
||||
|
||||
# Entities should be shown 'verbatim' here after expansion.
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Expand entities. (I)' \
|
||||
"$ENTITIES" \
|
||||
'a<b>c' \
|
||||
'
|
||||
#text: a<b>c'
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Expand entities. (II)' \
|
||||
"$ENTITIES" \
|
||||
'&bad-entity&good-entity;' \
|
||||
'
|
||||
#text: &bad-entity;&good-entity;'
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Expand entities. (III)' \
|
||||
"$ENTITIES" \
|
||||
'<a>&b;</a>' \
|
||||
'
|
||||
element: a
|
||||
#text: &b;'
|
||||
|
||||
|
||||
## CDATA Sections ###########################################################
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Replace CDATA section with text. (I)' \
|
||||
"$CDATA2TEXT" \
|
||||
'<![CDATA[a small text snippet]]>' \
|
||||
'
|
||||
#text: a small text snippet'
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Replace CDATA section with text. (II)' \
|
||||
"$CDATA2TEXT" \
|
||||
'<![CDATA[a small]]> <![CDATA[text snippet]]>' \
|
||||
'
|
||||
#text: a small text snippet'
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Replace CDATA section with text. (III)' \
|
||||
"$CDATA2TEXT" \
|
||||
'before <![CDATA[and]]> after' \
|
||||
'
|
||||
#text: before and after'
|
||||
|
||||
|
||||
## Element Content Whitespace ###############################################
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Remove element content whitespace. (I)' \
|
||||
"$NOWSTEXT" \
|
||||
'<a>
|
||||
<b>some text</b>
|
||||
</a>' \
|
||||
'
|
||||
element: a
|
||||
element: b
|
||||
#text: some text' \
|
||||
|
||||
# I haven't read the specs about this thing, for now it just blasts all
|
||||
# space-only text nodes. Probably not the wanted behaviour all of the time.
|
||||
# --jonas
|
||||
test_normalize_output_equals \
|
||||
'Remove element content whitespace. (II)' \
|
||||
"$NOWSTEXT" \
|
||||
'<e>space between &this; &that; gets removed</e>' \
|
||||
'
|
||||
element: e
|
||||
#text: space between
|
||||
entity-reference: this
|
||||
entity-reference: that
|
||||
#text: gets removed'
|
||||
|
||||
|
||||
## Mixes ####################################################################
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Normalization mix #1. (I)' \
|
||||
"$NORM1" \
|
||||
'before <![CDATA[and]]> after &some;<!--comments--> remain' \
|
||||
'
|
||||
#text: before and after &some; remain'
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Normalization mix #1. (II)' \
|
||||
"$NORM1" \
|
||||
'<!--a-->b&c; <![CDATA[d]]>' \
|
||||
'
|
||||
#text: b&c;d'
|
||||
|
||||
|
||||
## Special ELinks Extensions ################################################
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Remove unknown (HTML) elements and attributes. (I)' \
|
||||
"$NOOP,unknown" \
|
||||
'<html wack="..."><title w00t="...">where?<doit>here!</doit></title></html>' \
|
||||
'
|
||||
element: html
|
||||
element: title
|
||||
#text: where?' \
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Remove unknown (HTML) elements and attributes. (II)' \
|
||||
"$NOOP,unknown" \
|
||||
'<x y=""><z></z></x> aint no HTML' \
|
||||
'
|
||||
#text: aint no HTML' \
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Normalize whitespace. (I)' \
|
||||
"$NOOP,normalize-whitespace" \
|
||||
'Here is a
|
||||
|
||||
|
||||
|
||||
lot of useless space.' \
|
||||
'
|
||||
#text: Here is a lot of useless space.' \
|
||||
|
||||
test_normalize_output_equals \
|
||||
'Normalize whitespace. (II)' \
|
||||
"$CDATA2TEXT,normalize-whitespace" \
|
||||
'Could we <![CDATA[ please ]]> read that again?' \
|
||||
'
|
||||
#text: Could we please read that again?' \
|
||||
|
||||
test_done
|
Loading…
Reference in New Issue
Block a user