From afb45aace5d88dda7c6922b57b6e0b899ef7879e Mon Sep 17 00:00:00 2001 From: Jonas Fonseca Date: Wed, 25 Jan 2006 18:18:01 +0100 Subject: [PATCH] Add support for scanning comment endings such as '--!>' correctly --- src/dom/sgml/scanner.c | 20 ++++++++++++++------ src/dom/test/test-sgml-parser-basic | 9 ++++++++- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/dom/sgml/scanner.c b/src/dom/sgml/scanner.c index a372c9a59..af90a27b8 100644 --- a/src/dom/sgml/scanner.c +++ b/src/dom/sgml/scanner.c @@ -368,12 +368,20 @@ skip_sgml_comment(struct dom_scanner *scanner, unsigned char **string, /* It is always safe to access index -2 and -1 here since we * are supposed to have '' are not overlapping any - * preceeding '-'. */ - if (pos[-2] == '-' && pos[-1] == '-' && &pos[-2] >= *string) { - length = pos - *string - 2; - *possibly_incomplete = 0; - pos++; - break; + * preceeding '-'. Additionally also handle the quirky '--!>' + * end sometimes found. */ + if (pos[-2] == '-') { + if (pos[-1] == '-' && &pos[-2] >= *string) { + length = pos - *string - 2; + *possibly_incomplete = 0; + pos++; + break; + } else if (pos[-1] == '!' && pos[-3] == '-' && &pos[-3] >= *string) { + length = pos - *string - 3; + *possibly_incomplete = 0; + pos++; + break; + } } } diff --git a/src/dom/test/test-sgml-parser-basic b/src/dom/test/test-sgml-parser-basic index a203a78ac..1a22b7fed 100755 --- a/src/dom/test/test-sgml-parser-basic +++ b/src/dom/test/test-sgml-parser-basic @@ -93,11 +93,18 @@ test_output_equals \ #text: u' test_output_equals \ -'Parse bad comment.' \ +'Parse bad comment. (I)' \ 's' \ ' #comment: ->s' +test_output_equals \ +'Parse bad comment. (II)' \ +'bad comment' \ +' +#comment: a +#text: bad comment' + test_output_equals \ 'Parse empty notation.' \ 's' \