From bd877570d27668d63b7d2e715015a469510624b7 Mon Sep 17 00:00:00 2001 From: Jonas Fonseca Date: Thu, 29 Dec 2005 21:52:27 +0100 Subject: [PATCH] Test some more obscure proc. instructions and fix some assertion failures --- src/dom/sgml/scanner.c | 21 ++++++++++++++++----- src/dom/test/test-sgml-parser-basic | 28 ++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/dom/sgml/scanner.c b/src/dom/sgml/scanner.c index 946c0bb6..31f77f40 100644 --- a/src/dom/sgml/scanner.c +++ b/src/dom/sgml/scanner.c @@ -385,13 +385,23 @@ static inline void scan_sgml_proc_inst_token(struct dom_scanner *scanner, struct dom_scanner_token *token) { unsigned char *string = scanner->position; + size_t size; - token->string.string = string++; + token->string.string = string; - /* Figure out where the processing instruction ends */ - while (skip_sgml(scanner, &string, '>', 0)) - if (string[-2] == '?') + /* Figure out where the processing instruction ends. This doesn't use + * skip_sgml() since we MUST ignore precedence here to allow '<' inside + * the data part to be skipped correctly. */ + for (size = scanner->end - string; + size > 0 && (string = memchr(string, '>', size)); + string++) { + if (string[-1] == '?') { + string++; break; + } + } + + if (!string) string = scanner->end; token->type = SGML_TOKEN_PROCESS_DATA; token->string.length = string - token->string.string - 2; @@ -417,7 +427,8 @@ scan_sgml_tokens(struct dom_scanner *scanner) current < table_end && scanner->position < scanner->end; current++) { if (scanner->state == SGML_STATE_ELEMENT - || *scanner->position == '<') { + || (*scanner->position == '<' + && scanner->state != SGML_STATE_PROC_INST)) { scan_sgml(scanner, scanner->position, SGML_CHAR_WHITESPACE); if (scanner->position >= scanner->end) break; diff --git a/src/dom/test/test-sgml-parser-basic b/src/dom/test/test-sgml-parser-basic index d91709f5..ae0739c7 100755 --- a/src/dom/test/test-sgml-parser-basic +++ b/src/dom/test/test-sgml-parser-basic @@ -135,6 +135,34 @@ proc-instruction: xml -> encoding="UTF8" #text: \n...\n proc-instruction: ecmascript -> var val=2;\n' +test_output_equals \ +'Parse exotic processing instructions.' \ +'+?>-?>---' \ +' +proc-instruction: xml -> ?+>+ +#text: -?>- +proc-instruction: js -> +#text: - +proc-instruction: -> +#text: -' + +test_output_equals \ +'Parse incorrect processing instructions.' \ +'--- < +#text: - +proc-instruction: -> <=";& +#text: -' + +test_output_equals \ +'Parse incorrect processing instructions (II).' \ +' >