From 95c1de23158baa807bb66026fc5687acbea5a4dc Mon Sep 17 00:00:00 2001
From: Jonas Fonseca <fonseca@diku.dk>
Date: Sat, 28 Jan 2006 03:35:36 +0100
Subject: [PATCH] Fix handling of incomplete processing instructions

When doing incremental rendering we now require the whole thing to be there
and that there is room for two tokens in the scanner token table.  This is
necessary because we have to generate both a processing target token and a
processing data token to make life simpler for the parser.

Remove processing instruction data case label from the main parser loop. It
is safer this way since it already assumes that the processing target token
has been stored.
---
 src/dom/sgml/parser.c  |  1 -
 src/dom/sgml/scanner.c | 25 ++++++++++++++++++++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/dom/sgml/parser.c b/src/dom/sgml/parser.c
index 22f35cac..bd9e6b9e 100644
--- a/src/dom/sgml/parser.c
+++ b/src/dom/sgml/parser.c
@@ -364,7 +364,6 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
 			assert(token->type == SGML_TOKEN_PROCESS_DATA);
 			/* Fall-through */
 
-		case SGML_TOKEN_PROCESS_DATA:
 			if (!add_sgml_proc_instruction(stack, &target, token))
 				return SGML_PARSER_CODE_MEM_ALLOC;
 			if ((target.type == SGML_TOKEN_PROCESS_XML
diff --git a/src/dom/sgml/scanner.c b/src/dom/sgml/scanner.c
index 19df8fce..e2e7c5db 100644
--- a/src/dom/sgml/scanner.c
+++ b/src/dom/sgml/scanner.c
@@ -543,6 +543,29 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 				possibly_incomplete = 0;
 			}
 
+			if (scanner->check_complete && scanner->incomplete) {
+				/* We need to fit both the process target token
+				 * and the process data token into the scanner
+				 * table. */
+				if (token + 1 >= scanner->table + DOM_SCANNER_TOKENS) {
+					possibly_incomplete = 1;
+
+				} else if (!possibly_incomplete) {
+					/* FIXME: We do this twice. */
+					for (pos = string + 1;
+					     (pos = skip_sgml_chars(scanner, pos, '>'));
+					     pos++) {
+						if (pos[-1] == '?')
+							break;
+					}
+					if (!pos)
+						possibly_incomplete = 1;
+				}
+
+				if (possibly_incomplete)
+					string = scanner->end;
+			}
+
 		} else if (*string == '/') {
 			string++;
 			skip_sgml_space(scanner, &string);
@@ -707,7 +730,7 @@ scan_sgml_proc_inst_token(struct dom_scanner *scanner, struct dom_scanner_token
 	/* The length can be empty for '<??>'. */
 	ssize_t length = -1;
 
-	token->string.string = string;
+	token->string.string = string++;
 
 	/* Figure out where the processing instruction ends. This doesn't use
 	 * skip_sgml() since we MUST ignore precedence here to allow '<' inside