2005-12-28 22:44:03 -05:00
|
|
|
#!/bin/sh
|
|
|
|
#
|
|
|
|
# Copyright (c) 2005 Jonas Fonseca
|
|
|
|
#
|
|
|
|
|
2005-12-28 23:12:36 -05:00
|
|
|
test_description='Test the very basic parsing of SGML documents.
|
2005-12-28 22:44:03 -05:00
|
|
|
|
|
|
|
This test runs very basic features, like checking that nodes are placed
|
|
|
|
correctly in the DOM tree.
|
|
|
|
'
|
|
|
|
|
|
|
|
. ./libtest
|
|
|
|
|
2005-12-29 00:54:41 -05:00
|
|
|
test_output_equals () {
|
2006-01-01 19:48:08 -05:00
|
|
|
desc="$1"; shift
|
|
|
|
src="$1"; shift
|
|
|
|
out="$1"; shift
|
2005-12-29 00:54:41 -05:00
|
|
|
|
|
|
|
URI="test:$(echo "$desc" | sed '
|
|
|
|
s/^[ \t]*\[[^]]*\][ \t]*//;
|
|
|
|
s/[:., \t][:., \t]*/-/g;
|
|
|
|
s/_/-/g;
|
|
|
|
# *cough*
|
|
|
|
y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
|
|
|
|
s/[^a-zA-Z0-9-]//g;')"
|
|
|
|
|
2006-01-01 19:48:08 -05:00
|
|
|
sgml-parser --uri "$URI" --src "$src" $@ | sed 's/^ //' > output
|
2005-12-29 00:54:41 -05:00
|
|
|
echo "#document: $URI" > expected
|
|
|
|
echo "$out" | sed -n '2,$p' >> expected
|
|
|
|
|
|
|
|
test_expect_success "$desc" 'cmp -b output expected'
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-12-28 22:44:03 -05:00
|
|
|
################################################################
|
2005-12-29 00:54:41 -05:00
|
|
|
# Parse various SGML node types.
|
|
|
|
|
|
|
|
test_output_equals \
|
|
|
|
'Parse a small document.' \
|
|
|
|
'<html><body><p>Hello World!</p></body></html>' \
|
|
|
|
'
|
|
|
|
element: html
|
|
|
|
element: body
|
|
|
|
element: p
|
|
|
|
#text: Hello World!'
|
|
|
|
|
2005-12-29 13:13:48 -05:00
|
|
|
test_output_equals \
|
|
|
|
'Parse elements.' \
|
|
|
|
'<root><child attr="value" /><child2></><child3 >a</></root>' \
|
|
|
|
'
|
|
|
|
element: root
|
|
|
|
element: child
|
|
|
|
attribute: attr -> value
|
|
|
|
element: child2
|
|
|
|
element: child3
|
|
|
|
#text: a'
|
|
|
|
|
2006-01-02 10:26:01 -05:00
|
|
|
test_output_equals \
|
|
|
|
'Check tag soup elements.' \
|
|
|
|
'<parent attr="value" <child:1></><child:2</>a</parent>' \
|
|
|
|
'
|
|
|
|
element: parent
|
|
|
|
attribute: attr -> value
|
|
|
|
element: child:1
|
|
|
|
element: child:2
|
|
|
|
#text: a'
|
|
|
|
|
2005-12-29 00:54:41 -05:00
|
|
|
test_output_equals \
|
|
|
|
'Parse an enclosed comment.' \
|
|
|
|
'<root><!-- Hello World! --></root>' \
|
|
|
|
'
|
|
|
|
element: root
|
|
|
|
#comment: Hello World! '
|
|
|
|
|
2005-12-29 16:26:39 -05:00
|
|
|
test_output_equals \
|
|
|
|
'Parse comment combinations.' \
|
2005-12-30 19:47:57 -05:00
|
|
|
'<root><!-- <!-- -- > --><!--foo--><!----></root>' \
|
2005-12-29 16:26:39 -05:00
|
|
|
'
|
|
|
|
element: root
|
|
|
|
#comment: <!-- -- >
|
2005-12-30 19:47:57 -05:00
|
|
|
#comment: foo
|
2005-12-29 16:26:39 -05:00
|
|
|
#comment: '
|
|
|
|
|
|
|
|
test_output_equals \
|
2005-12-30 19:47:57 -05:00
|
|
|
'Parse bad comment.' \
|
|
|
|
'<!--->s' \
|
2005-12-29 16:26:39 -05:00
|
|
|
'
|
2005-12-30 19:47:57 -05:00
|
|
|
#comment: ->s'
|
2005-12-29 16:26:39 -05:00
|
|
|
|
2005-12-29 00:54:41 -05:00
|
|
|
test_output_equals \
|
|
|
|
'Parse an enclosed CDATA section.' \
|
|
|
|
'<root><![CDATA[...] ]>...]]></root>' \
|
|
|
|
'
|
|
|
|
element: root
|
|
|
|
#cdata-section: ...] ]>...'
|
|
|
|
|
2005-12-30 19:47:57 -05:00
|
|
|
test_output_equals \
|
|
|
|
'Parse non-enclosed CDATA section.' \
|
|
|
|
'<![CDATA[...]]>' \
|
|
|
|
'
|
|
|
|
#cdata-section: ...'
|
|
|
|
|
|
|
|
test_output_equals \
|
|
|
|
'Parse a bad CDATA section.' \
|
|
|
|
'<![CDATA[...' \
|
|
|
|
'
|
|
|
|
#cdata-section: ...'
|
|
|
|
|
2005-12-29 00:54:41 -05:00
|
|
|
test_output_equals \
|
|
|
|
'Parse attributes.' \
|
|
|
|
'<root lang="fr" attr name="value with &foo; <stuff"></root>' \
|
|
|
|
'
|
|
|
|
element: root
|
|
|
|
attribute: lang -> fr
|
|
|
|
attribute: attr ->
|
|
|
|
attribute: name -> value with &foo; <stuff'
|
|
|
|
|
|
|
|
test_output_equals \
|
2005-12-29 13:13:48 -05:00
|
|
|
'Parse attributes with garbage.' \
|
|
|
|
"<root a=b c='d' e'f' g= h i = j k =></root>" \
|
2005-12-29 00:54:41 -05:00
|
|
|
'
|
|
|
|
element: root
|
2005-12-29 13:13:48 -05:00
|
|
|
attribute: a -> b
|
|
|
|
attribute: c -> d
|
|
|
|
attribute: g -> h
|
|
|
|
attribute: i -> j
|
|
|
|
attribute: k -> '
|
|
|
|
|
2005-12-29 14:38:43 -05:00
|
|
|
test_output_equals \
|
|
|
|
'Parse attribute with non-quoted values.' \
|
|
|
|
'<root color=#abc path=/to/%61-&\one";files/>...' \
|
|
|
|
'
|
|
|
|
element: root
|
|
|
|
attribute: color -> #abc
|
|
|
|
attribute: path -> /to/%61-&\one";files
|
|
|
|
#text: ...'
|
|
|
|
|
2005-12-29 13:13:48 -05:00
|
|
|
test_output_equals \
|
|
|
|
'Parse entity references.' \
|
|
|
|
'&-*' \
|
|
|
|
'
|
|
|
|
entity-reference: amp
|
|
|
|
#text: -
|
|
|
|
entity-reference: #42'
|
|
|
|
|
|
|
|
# Just how these should be gracefully handled is not clear to me.
|
|
|
|
test_output_equals \
|
|
|
|
'Parse badly formatted entity references.' \
|
|
|
|
'& m33p;-&.:-copy;-&;-&#;-&#xx;' \
|
|
|
|
'
|
|
|
|
#text: & m33p;
|
|
|
|
#text: -
|
|
|
|
entity-reference: .:-copy
|
|
|
|
#text: -
|
|
|
|
#text: &;
|
|
|
|
#text: -
|
|
|
|
entity-reference: #
|
|
|
|
#text: -
|
|
|
|
entity-reference: #xx'
|
2005-12-29 00:54:41 -05:00
|
|
|
|
|
|
|
test_output_equals \
|
|
|
|
'Parse processing instructions.' \
|
|
|
|
'<?xml encoding="UTF8"?>
|
|
|
|
...
|
2005-12-29 13:13:48 -05:00
|
|
|
<?ecmascript
|
2005-12-29 00:54:41 -05:00
|
|
|
var val=2;
|
|
|
|
?>' \
|
|
|
|
'
|
|
|
|
proc-instruction: xml -> encoding="UTF8"
|
|
|
|
attribute: encoding -> UTF8
|
|
|
|
#text: \n...\n
|
2005-12-29 12:00:26 -05:00
|
|
|
proc-instruction: ecmascript -> var val=2;\n'
|
2005-12-28 22:44:03 -05:00
|
|
|
|
2005-12-29 16:26:39 -05:00
|
|
|
test_output_equals \
|
|
|
|
'Parse XML processing instructions.' \
|
|
|
|
'<?xml version="1.0" />?><?xml />-' \
|
|
|
|
'
|
|
|
|
proc-instruction: xml -> version="1.0" />
|
|
|
|
attribute: version -> 1.0
|
|
|
|
proc-instruction: xml -> /'
|
|
|
|
|
2005-12-30 21:13:39 -05:00
|
|
|
test_output_equals \
|
|
|
|
'Parse XML stylesheet processing instructions.' \
|
|
|
|
'<?xml-stylesheet type="text/xsl" href="url"?>' \
|
|
|
|
'
|
|
|
|
proc-instruction: xml-stylesheet -> type="text/xsl" href="url"
|
|
|
|
attribute: type -> text/xsl
|
|
|
|
attribute: href -> url'
|
|
|
|
|
2005-12-29 15:52:27 -05:00
|
|
|
test_output_equals \
|
|
|
|
'Parse exotic processing instructions.' \
|
|
|
|
'<?xml ?+>+?>-?>-<?js?>-<??>-' \
|
|
|
|
'
|
|
|
|
proc-instruction: xml -> ?+>+
|
|
|
|
#text: -?>-
|
|
|
|
proc-instruction: js ->
|
|
|
|
#text: -
|
|
|
|
proc-instruction: ->
|
|
|
|
#text: -'
|
|
|
|
|
|
|
|
test_output_equals \
|
|
|
|
'Parse incorrect processing instructions.' \
|
|
|
|
'<?js<?>-<?<??>-<?xml <=";&?>-<?' \
|
|
|
|
'
|
|
|
|
proc-instruction: js -> <
|
|
|
|
#text: -
|
|
|
|
proc-instruction: -> <?
|
|
|
|
#text: -
|
|
|
|
proc-instruction: xml -> <=";&
|
|
|
|
#text: -'
|
|
|
|
|
|
|
|
test_output_equals \
|
|
|
|
'Parse incorrect processing instructions (II).' \
|
|
|
|
'<?><?' \
|
|
|
|
'
|
|
|
|
proc-instruction: -> ><?'
|
|
|
|
|
2005-12-29 13:13:48 -05:00
|
|
|
test_output_equals \
|
|
|
|
'Skip spaces not inside text.' \
|
|
|
|
'<
|
|
|
|
root
|
|
|
|
ns:attr
|
|
|
|
=
|
|
|
|
"value"
|
|
|
|
><?
|
|
|
|
target
|
|
|
|
data?>< / root >' \
|
|
|
|
'
|
|
|
|
element: root
|
|
|
|
attribute: ns:attr -> value
|
|
|
|
proc-instruction: target -> data'
|
|
|
|
|
2006-01-01 19:48:08 -05:00
|
|
|
test_output_equals \
|
|
|
|
'Check line numbers. (I)' \
|
|
|
|
'<!-- line --> number <one />' \
|
|
|
|
'
|
|
|
|
1' \
|
|
|
|
--print-lines
|
|
|
|
|
|
|
|
test_output_equals \
|
|
|
|
'Check line numbers. (II)' \
|
|
|
|
'<
|
|
|
|
line:2
|
|
|
|
line:3
|
|
|
|
=
|
|
|
|
"line:5"
|
|
|
|
><?xml
|
|
|
|
line:7="..."
|
|
|
|
line:8
|
|
|
|
=
|
|
|
|
'\''...'\''></line:10>' \
|
|
|
|
'
|
|
|
|
10' \
|
|
|
|
--print-lines
|
|
|
|
|
|
|
|
test_output_equals \
|
|
|
|
'Check line numbers. (III)' \
|
|
|
|
'1
|
|
|
|
2
|
|
|
|
3
|
|
|
|
4
|
|
|
|
5
|
|
|
|
6
|
|
|
|
7
|
|
|
|
8' \
|
|
|
|
'
|
|
|
|
8' \
|
|
|
|
--print-lines
|
|
|
|
|
|
|
|
|
2005-12-28 22:44:03 -05:00
|
|
|
test_done
|