#!/bin/sh # # Copyright (c) 2005 Jonas Fonseca # test_description='Test the very basic parsing of SGML documents. This test runs very basic features, like checking that nodes are placed correctly in the DOM tree. ' . ./libtest test_output_equals () { desc="$1"; shift src="$1"; shift out="$1"; shift URI="test:$(echo "$desc" | sed ' s/^[ \t]*\[[^]]*\][ \t]*//; s/[:., \t][:., \t]*/-/g; s/_/-/g; # *cough* y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/; s/[^a-zA-Z0-9-]//g;')" sgml-parser --uri "$URI" --src "$src" $@ | sed 's/^ //' > output echo "#document: $URI" > expected echo "$out" | sed -n '2,$p' >> expected test_expect_success "$desc" 'cmp -b output expected' } ################################################################ # Parse various SGML node types. test_output_equals \ 'Parse a small document.' \ '

Hello World!

' \ ' element: html element: body element: p #text: Hello World!' test_output_equals \ 'Parse elements.' \ 'a' \ ' element: root element: child attribute: attr -> value element: child2 element: child3 #text: a' test_output_equals \ 'Check tag soup elements.' \ 'a' \ ' element: parent attribute: attr -> value element: child:1 element: child:2 #text: a' test_output_equals \ 'Parse an enclosed comment.' \ '' \ ' element: root #comment: Hello World! ' test_output_equals \ 'Parse comment combinations.' \ '' \ ' element: root #comment: s' \ ' #comment: ->s' test_output_equals \ 'Parse an enclosed CDATA section.' \ '...]]>' \ ' element: root #cdata-section: ...] ]>...' test_output_equals \ 'Parse non-enclosed CDATA section.' \ '' \ ' #cdata-section: ...' test_output_equals \ 'Parse a bad CDATA section.' \ '' \ ' element: root attribute: lang -> fr attribute: attr -> attribute: name -> value with &foo; " \ ' element: root attribute: a -> b attribute: c -> d attribute: g -> h attribute: i -> j attribute: k -> ' test_output_equals \ 'Parse attribute with non-quoted values.' \ '...' \ ' element: root attribute: color -> #abc attribute: path -> /to/%61-&\one";files #text: ...' test_output_equals \ 'Parse entity references.' \ '&-*' \ ' entity-reference: amp #text: - entity-reference: #42' # Just how these should be gracefully handled is not clear to me. test_output_equals \ 'Parse badly formatted entity references.' \ '& m33p;-&.:-copy;-&;-&#;-&#xx;' \ ' #text: & m33p; #text: - entity-reference: .:-copy #text: - #text: &; #text: - entity-reference: # #text: - entity-reference: #xx' test_output_equals \ 'Parse processing instructions.' \ ' ... ' \ ' proc-instruction: xml -> encoding="UTF8" attribute: encoding -> UTF8 #text: \n...\n proc-instruction: ecmascript -> var val=2;\n' test_output_equals \ 'Parse XML processing instructions.' \ '?>-' \ ' proc-instruction: xml -> version="1.0" /> attribute: version -> 1.0 proc-instruction: xml -> /' test_output_equals \ 'Parse XML stylesheet processing instructions.' \ '' \ ' proc-instruction: xml-stylesheet -> type="text/xsl" href="url" attribute: type -> text/xsl attribute: href -> url' test_output_equals \ 'Parse exotic processing instructions.' \ '+?>-?>---' \ ' proc-instruction: xml -> ?+>+ #text: -?>- proc-instruction: js -> #text: - proc-instruction: -> #text: -' test_output_equals \ 'Parse incorrect processing instructions.' \ '--- < #text: - proc-instruction: -> <=";& #text: -' test_output_equals \ 'Parse incorrect processing instructions (II).' \ ' >< / root >' \ ' element: root attribute: ns:attr -> value proc-instruction: target -> data' test_output_equals \ 'Check line numbers. (I)' \ ' number ' \ ' 1' \ --print-lines test_output_equals \ 'Check line numbers. (II)' \ '< line:2 line:3 = "line:5" >' \ ' 10' \ --print-lines test_output_equals \ 'Check line numbers. (III)' \ '1 2 3 4 5 6 7 8' \ ' 8' \ --print-lines test_done