1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-12-04 14:46:47 -05:00
elinks/src/dom/test/test-sgml-parser-basic

385 lines
7.1 KiB
Plaintext
Raw Normal View History

#!/bin/sh
#
# Copyright (c) 2005 Jonas Fonseca
#
test_description='Test the very basic parsing of SGML documents.
This test runs very basic features, like checking that nodes are placed
correctly in the DOM tree.
'
. "$TEST_LIB"
test_output_equals () {
desc="$1"; shift
src="$1"; shift
out="$1"; shift
URI="test:$(echo "$desc" | sed '
s/^[ \t]*\[[^]]*\][ \t]*//;
s/[:., \t][:., \t]*/-/g;
s/_/-/g;
# *cough*
y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
s/[^a-zA-Z0-9-]//g;')"
sgml-parser --uri "$URI" --src "$src" $@ | sed 's/^ //' > output
echo "#document: $URI" > expected
echo "$out" | sed -n '2,$p' >> expected
test_expect_success "$desc" 'cmp output expected'
}
test_expect_incomplete () {
desc="$1"; shift
src="$1"; shift
URI="test:$(echo "$desc" | sed '
s/^[ \t]*\[[^]]*\][ \t]*//;
s/[:., \t][:., \t]*/-/g;
s/_/-/g;
# *cough*
y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
s/[^a-zA-Z0-9-]//g;')"
sgml-parser --uri "$URI" --src "$src" --incomplete >/dev/null
test_expect_success "$desc" \
"test $? = 1"
}
################################################################
# Parse various SGML node types.
test_output_equals \
'Parse a small document.' \
'<html><body><p>Hello World!</p></body></html>' \
'
element: html
element: body
element: p
#text: Hello World!'
test_output_equals \
'Parse elements.' \
'<root><child attr="value" /><child2></><child3 >a</></root>' \
'
element: root
element: child
attribute: attr -> value
element: child2
element: child3
#text: a'
test_output_equals \
'Parse tag soup elements.' \
'<parent attr="value" <child:1></><child:2</>a</parent>' \
'
element: parent
attribute: attr -> value
element: child:1
element: child:2
#text: a'
test_output_equals \
'Parse an enclosed comment.' \
'<root><!-- Hello World! --></root>' \
'
element: root
#comment: Hello World! '
test_output_equals \
'Parse comment combinations. (I)' \
'<root><!-- <!-- -- > --><!--foo--><!----></root>' \
'
element: root
#comment: <!-- -- >
#comment: foo
#comment: '
test_output_equals \
'Parse comment combinations. (II).' \
'<! -- comment -->s<!-->-->t<!----->u' \
'
#comment: comment
#text: s
#comment: >
#text: t
#comment: -
#text: u'
test_output_equals \
'Parse bad comment.' \
'<!--->s' \
'
#comment: ->s'
test_output_equals \
'Parse empty notation.' \
'<!>s' \
'
#text: s'
test_output_equals \
'Parse an enclosed CDATA section.' \
'<root><![CDATA[...] ]>...]]></root>' \
'
element: root
#cdata-section: ...] ]>...'
test_output_equals \
'Parse non-enclosed CDATA section.' \
'<![CDATA[...]]>' \
'
#cdata-section: ...'
test_output_equals \
'Parse a bad CDATA section.' \
'<![CDATA[...' \
'
#cdata-section: ...'
test_output_equals \
'Parse attributes.' \
'<root lang="fr" attr name="value with &foo; <stuff"></root>' \
'
element: root
attribute: lang -> fr
attribute: attr ->
attribute: name -> value with &foo; <stuff'
test_output_equals \
'Parse attributes with garbage.' \
"<root a=b c='d' e'f' g= h i = j k =></root>" \
'
element: root
attribute: a -> b
attribute: c -> d
attribute: g -> h
attribute: i -> j
attribute: k -> '
test_output_equals \
'Parse attribute with non-quoted values.' \
'<root color=#abc path=/to/%61-&\one";files/>...' \
'
element: root
attribute: color -> #abc
attribute: path -> /to/%61-&\one";files
#text: ...'
test_output_equals \
'Parse entity references.' \
'&amp;-&#42;' \
'
entity-reference: amp
#text: -
entity-reference: #42'
# Just how these should be gracefully handled is not clear to me.
test_output_equals \
'Parse badly formatted entity references.' \
'& m33p;-&.:-copy;-&;-&#;-&#xx;' \
'
#text: & m33p;
#text: -
entity-reference: .:-copy
#text: -
#text: &;
#text: -
entity-reference: #
#text: -
entity-reference: #xx'
test_output_equals \
'Parse processing instructions.' \
'<?xml encoding="UTF8"?>
...
<?ecmascript
var val=2;
?>' \
'
proc-instruction: xml -> encoding="UTF8"
attribute: encoding -> UTF8
#text: \n...\n
proc-instruction: ecmascript -> var val=2;\n'
test_output_equals \
'Parse XML processing instructions.' \
'<?xml version="1.0" />?><?xml />-' \
'
proc-instruction: xml -> version="1.0" />
attribute: version -> 1.0
proc-instruction: xml -> />-'
test_output_equals \
'Parse XML stylesheet processing instructions.' \
'<?xml-stylesheet type="text/xsl" href="url"?>' \
'
proc-instruction: xml-stylesheet -> type="text/xsl" href="url"
attribute: type -> text/xsl
attribute: href -> url'
test_output_equals \
'Parse exotic processing instructions.' \
'<?xml ?+>+?>-?>-<?js?>-<??>-' \
'
proc-instruction: xml -> ?+>+
#text: -?>-
proc-instruction: js ->
#text: -
proc-instruction: ->
#text: -'
test_output_equals \
'Parse incorrect processing instructions.' \
'<?js<?>-<?<??>-<?xml <=";&?>-<?' \
'
proc-instruction: js -> <
#text: -
proc-instruction: -> <?
#text: -
proc-instruction: xml -> <=";&
#text: -'
test_output_equals \
'Parse incorrect processing instructions (II).' \
'<?><?' \
'
proc-instruction: -> ><?'
test_output_equals \
'Skip spaces not inside text.' \
'<
root
ns:attr
=
"value"
><?
target
data?>< / root >' \
'
element: root
attribute: ns:attr -> value
proc-instruction: target -> data'
test_output_equals \
'Check line numbers. (I)' \
'<!-- line --> number <one />' \
'
1' \
--print-lines
test_output_equals \
'Check line numbers. (II)' \
'<
line:2
line:3
=
"line:5"
><?xml
line:7="..."
line:8
=
'\''...'\''></line:10>' \
'
10' \
--print-lines
test_output_equals \
'Check line numbers. (III)' \
'1
2
3
4
5
6
7
8' \
'
8' \
--print-lines
test_expect_incomplete \
'Check incomplete comment. (I)' \
'<!-'
test_expect_incomplete \
'Check incomplete comment. (II)' \
'<!-- ... '
test_expect_incomplete \
'Check incomplete notation. (I)' \
'<!'
test_expect_incomplete \
'Check incomplete notation. (II)' \
'<!D'
test_expect_incomplete \
'Check incomplete cdata section. (I)' \
'<![CDATA[ ... '
test_expect_incomplete \
'Check incomplete cdata section. (II)' \
'<![CDAT'
test_expect_incomplete \
'Check incomplete element. (I)' \
'<elem...'
test_expect_incomplete \
'Check incomplete element. (II)' \
'<'
test_expect_incomplete \
'Check incomplete element end. (I)' \
'<a></a'
test_expect_incomplete \
'Check incomplete element end. (II)' \
'<a></'
test_expect_incomplete \
'Check incomplete attribute.' \
'<element attr...'
test_expect_incomplete \
'Check incomplete attribute value.' \
'<element attr=...'
test_expect_incomplete \
'Check incomplete attribute quoted value. (I)' \
'<element attr="...'
test_expect_incomplete \
'Check incomplete attribute quoted value. (II)' \
"<element attr='..."
test_expect_incomplete \
'Check incomplete processing instruction. (I)' \
'<?xml'
test_expect_incomplete \
'Check incomplete processing instruction. (II)' \
'<?xml attr...'
test_expect_incomplete \
'Check incomplete notation.' \
'<!DOCTYPE html PUBLIC ...'
test_expect_incomplete \
'Check incomplete reference. (I)' \
'&#123456789'
test_expect_incomplete \
'Check incomplete reference. (II)' \
'&amp'
2006-01-02 16:35:03 -05:00
test_expect_incomplete \
'Check incomplete text.' \
'plain text is always incomplete (if incomplete)'
test_done