From a2376609e31b219b8e3a4effe8bb9c4456521699 Mon Sep 17 00:00:00 2001
From: Jonas Fonseca <fonseca@diku.dk>
Date: Sat, 28 Jan 2006 00:50:06 +0100
Subject: [PATCH 01/20] Expand the testing of incremental parsing

There are still some bugs to resolve.
---
 src/dom/test/test-sgml-parser-incremental | 238 +++++++++++++++++++++-
 1 file changed, 227 insertions(+), 11 deletions(-)
diff --git a/src/dom/test/test-sgml-parser-incremental b/src/dom/test/test-sgml-parser-incremental
index a9896e52b..aa6ea64b1 100755
--- a/src/dom/test/test-sgml-parser-incremental
+++ b/src/dom/test/test-sgml-parser-incremental
@@ -11,9 +11,8 @@ parsing.
 
 . "$TEST_LIB"
 
-test_output_equals () {
+test_incremental_parsing () {
 	desc="$1"; shift
-	size="$1"; shift
 	src="$1"; shift
 	out="$1"; shift
 
@@ -25,25 +24,242 @@ test_output_equals () {
 		y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
 		s/[^a-zA-Z0-9-]//g;')"
 
-	echo "$src" | sgml-parser --uri "$URI" --stdin "$size" \
-	| sed -e 's/^  //' | sed -n '$d;p' > output
 	echo "#document: $URI" > expected
 	echo "$out" | sed -n '2,$p' >> expected
 
-	test_expect_success "$desc" 'cmp output expected' 
+	for size in 1 2 3 4 5 6 7 8 9 10 15 20 25 50; do
+		echo -n "$src" | sgml-parser --uri "$URI" --stdin "$size" \
+		| sed -e 's/^  //' > output
+
+		test_run_ 'cmp output expected'
+		if [ "$?" != 0 -o "$eval_ret" != 0 ]
+		then
+			test_failure_ "$desc" "($size bytes)"
+			return
+		fi
+	done
+
+	test_ok_ "$desc"
 }
 
-for i in 25 20 15 10 9 8 7 6 5 4 3 2 1; do
-	test_output_equals \
-	"Incrementally parse a small document reading $i bytes at a time." \
-	"$i" \
-	'<html><body><p>Hello World!</p></body></html>' \
+test_incremental_parsing \
+"Parse a small document." \
+'<html><body><p>Hello World!</p></body></html>' \
 	'
 element: html
   element: body
     element: p
       #text: Hello World!'
 
-done
+test_incremental_parsing \
+'Parse elements.' \
+'<root><child attr="value" /><child2></><child3 >a</></root>' \
+'
+element: root
+  element: child
+    attribute: attr -> value
+  element: child2
+  element: child3
+    #text: a'
+
+test_incremental_parsing \
+'Parse tag soup elements.' \
+'<parent attr="value" <child:1></><child:2</>a</parent>' \
+'
+element: parent
+  attribute: attr -> value
+  element: child:1
+  element: child:2
+  #text: a'
+
+test_incremental_parsing \
+'Parse an enclosed comment.' \
+'<root><!-- Hello World! --></root>' \
+'
+element: root
+  #comment:  Hello World! '
+
+test_incremental_parsing \
+'Parse comment combinations. (I)' \
+'<root><!-- <!-- -- > --><!--foo--><!----></root>' \
+'
+element: root
+  #comment:  <!-- -- > 
+  #comment: foo
+  #comment: '
+
+test_incremental_parsing \
+'Parse comment combinations. (II).' \
+'<! -- comment -->s<!-->-->t<!----->u' \
+'
+#comment:  comment 
+#text: s
+#comment: >
+#text: t
+#comment: -
+#text: u'
+
+test_incremental_parsing \
+'Parse bad comment. (I)' \
+'<!--->s' \
+'
+#comment: ->s'
+
+test_incremental_parsing \
+'Parse bad comment. (II)' \
+'<!--a--!>bad comment' \
+'
+#comment: a
+#text: bad comment'
+
+test_incremental_parsing \
+'Parse empty notation.' \
+'<!>s' \
+'
+#text: s'
+
+test_incremental_parsing \
+'Parse an enclosed CDATA section.' \
+'<root><![CDATA[...] ]>...]]></root>' \
+'
+element: root
+  #cdata-section: ...] ]>...'
+
+test_incremental_parsing \
+'Parse non-enclosed CDATA section.' \
+'<![CDATA[...]]>' \
+'
+#cdata-section: ...'
+
+test_incremental_parsing \
+'Parse a bad CDATA section.' \
+'<![CDATA[...' \
+'
+#cdata-section: ...'
+
+test_incremental_parsing \
+'Parse attributes.' \
+'<root lang="fr" attr name="value with &foo; <stuff"></root>' \
+'
+element: root
+  attribute: lang -> fr
+  attribute: attr -> 
+  attribute: name -> value with &foo; <stuff'
+
+test_incremental_parsing \
+'Parse attributes with garbage.' \
+"<root a=b c='d' e'f' g= h i = j k =></root>" \
+'
+element: root
+  attribute: a -> b
+  attribute: c -> d
+  attribute: g -> h
+  attribute: i -> j
+  attribute: k -> ' 
+
+test_incremental_parsing \
+'Parse attribute with non-quoted values.' \
+'<root color=#abc path=/to/%61-&\one";files/>...' \
+'
+element: root
+  attribute: color -> #abc
+  attribute: path -> /to/%61-&\one";files
+#text: ...'
+
+test_incremental_parsing \
+'Parse entity references.' \
+'&amp;-&#42;' \
+'
+entity-reference: amp
+#text: -
+entity-reference: #42'
+
+# Just how these should be gracefully handled is not clear to me.
+test_incremental_parsing \
+'Parse badly formatted entity references.' \
+'& m33p;-&.:-copy;-&;-&#;-&#xx;' \
+'
+#text: & m33p;
+#text: -
+entity-reference: .:-copy
+#text: -
+#text: &;
+#text: -
+entity-reference: #
+#text: -
+entity-reference: #xx'
+
+test_incremental_parsing \
+'Parse processing instructions.' \
+'<?xml encoding="UTF8"?>
+...
+<?ecmascript
+var val=2;
+?>' \
+'
+proc-instruction: xml -> encoding="UTF8"
+  attribute: encoding -> UTF8
+#text: \n...\n
+proc-instruction: ecmascript -> var val=2;\n'
+
+test_incremental_parsing \
+'Parse XML processing instructions.' \
+'<?xml version="1.0" />?><?xml />-' \
+'
+proc-instruction: xml -> version="1.0" />
+  attribute: version -> 1.0
+proc-instruction: xml -> />-'
+
+test_incremental_parsing \
+'Parse XML stylesheet processing instructions.' \
+'<?xml-stylesheet type="text/xsl" href="url"?>' \
+'
+proc-instruction: xml-stylesheet -> type="text/xsl" href="url"
+  attribute: type -> text/xsl
+  attribute: href -> url'
+
+test_incremental_parsing \
+'Parse exotic processing instructions.' \
+'<?xml ?+>+?>-?>-<?js?>-<??>-' \
+'
+proc-instruction: xml -> ?+>+
+#text: -?>-
+proc-instruction: js -> 
+#text: -
+proc-instruction:  -> 
+#text: -'
+
+test_incremental_parsing \
+'Parse incorrect processing instructions.' \
+'<?js<?>-<?<??>-<?xml <=";&?>-<?' \
+'
+proc-instruction: js -> <
+#text: -
+proc-instruction:  -> <?
+#text: -
+proc-instruction: xml -> <=";&
+#text: -'
+
+test_incremental_parsing \
+'Parse incorrect processing instructions (II).' \
+'<?><?' \
+'
+proc-instruction:  -> ><?'
+
+test_incremental_parsing \
+'Skip spaces not inside text.' \
+'<
+root
+ns:attr                      
+=
+"value"
+><?	
+	target	
+ data?><	/	root	>' \
+'
+element: root
+  attribute: ns:attr -> value
+  proc-instruction: target -> data'
+
 
 test_done

From bccf5512d608b22c85acbc33342bfff02d10be65 Mon Sep 17 00:00:00 2001
From: Jonas Fonseca <fonseca@diku.dk>
Date: Sat, 28 Jan 2006 00:56:48 +0100
Subject: [PATCH 02/20] Force an incomplete token for quoted attribute values
 when there's no end

---
 src/dom/sgml/scanner.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/dom/sgml/scanner.c b/src/dom/sgml/scanner.c
index f4a6578ac..13a517ee2 100644
--- a/src/dom/sgml/scanner.c
+++ b/src/dom/sgml/scanner.c
@@ -641,6 +641,10 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 			/* We found the end. */
 			possibly_incomplete = 0;
 
+		} else if (scanner->check_complete && scanner->incomplete) {
+			/* Force an incomplete token. */
+			string = scanner->end;
+
 		} else if (is_sgml_attribute(*string)) {
 			token->string.string++;
 			scan_sgml_attribute(scanner, string);

From 74728cab059572ea897912f3ffe695b480407ed0 Mon Sep 17 00:00:00 2001
From: Jonas Fonseca <fonseca@diku.dk>
Date: Sat, 28 Jan 2006 01:00:28 +0100
Subject: [PATCH 03/20] Also set the node subtype for <?xml-stylesheet?>

---
 src/dom/node.h        | 5 ++---
 src/dom/sgml/parser.c | 4 ++++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/dom/node.h b/src/dom/node.h
index 9f4d2cfbd..f7de061b8 100644
--- a/src/dom/node.h
+++ b/src/dom/node.h
@@ -151,9 +151,8 @@ enum dom_proc_instruction_type {
 	DOM_PROC_INSTRUCTION,
 
 	/* Keep this group sorted */
-	DOM_PROC_INSTRUCTION_DBHTML,	/* DocBook toolchain instruction */
-	DOM_PROC_INSTRUCTION_ELINKS,	/* Internal instruction hook */
-	DOM_PROC_INSTRUCTION_XML,	/* XML instructions */
+	DOM_PROC_INSTRUCTION_XML,		/* XML header */
+	DOM_PROC_INSTRUCTION_XML_STYLESHEET,	/* XML stylesheet link */
 
 	DOM_PROC_INSTRUCTION_TYPES
 };
diff --git a/src/dom/sgml/parser.c b/src/dom/sgml/parser.c
index bda43c5c5..c8511d56a 100644
--- a/src/dom/sgml/parser.c
+++ b/src/dom/sgml/parser.c
@@ -117,6 +117,10 @@ add_sgml_proc_instruction(struct dom_stack *stack, struct dom_scanner_token *tar
 		node->data.proc_instruction.type = DOM_PROC_INSTRUCTION_XML;
 		break;
 
+	case SGML_TOKEN_PROCESS_XML_STYLESHEET:
+		node->data.proc_instruction.type = DOM_PROC_INSTRUCTION_XML_STYLESHEET;
+		break;
+
 	case SGML_TOKEN_PROCESS:
 	default:
 		node->data.proc_instruction.type = DOM_PROC_INSTRUCTION;

From 1e104afbbab980e83d4f1d7cb6481100265ef85a Mon Sep 17 00:00:00 2001
From: Jonas Fonseca <fonseca@diku.dk>
Date: Sat, 28 Jan 2006 01:05:42 +0100
Subject: [PATCH 04/20] Improve error checking when adding nodes

Fail with SGML_PARSER_CODE_MEM_ALLOC.
---
 src/dom/sgml/parser.c | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/src/dom/sgml/parser.c b/src/dom/sgml/parser.c
index c8511d56a..03f3f0c10 100644
--- a/src/dom/sgml/parser.c
+++ b/src/dom/sgml/parser.c
@@ -74,7 +74,7 @@ add_sgml_element(struct dom_stack *stack, struct dom_scanner_token *token)
 }
 
 
-static inline void
+static inline struct dom_node *
 add_sgml_attribute(struct dom_stack *stack,
 		   struct dom_scanner_token *token, struct dom_scanner_token *valtoken)
 {
@@ -96,9 +96,11 @@ add_sgml_attribute(struct dom_stack *stack,
 		node->data.attribute.quoted = 1;
 
 	if (!node || push_dom_node(stack, node) != DOM_STACK_CODE_OK)
-		return;
+		return NULL;
 
 	pop_dom_node(stack);
+
+	return node;
 }
 
 static inline struct dom_node *
@@ -132,19 +134,21 @@ add_sgml_proc_instruction(struct dom_stack *stack, struct dom_scanner_token *tar
 	return NULL;
 }
 
-static inline void
+static inline struct dom_node *
 add_sgml_node(struct dom_stack *stack, enum dom_node_type type, struct dom_scanner_token *token)
 {
 	struct dom_node *parent = get_dom_stack_top(stack)->node;
 	struct dom_node *node = add_dom_node(parent, type, &token->string);
 
-	if (!node) return;
+	if (!node) return NULL;
 
 	if (token->type == SGML_TOKEN_SPACE)
 		node->data.text.only_space = 1;
 
 	if (push_dom_node(stack, node) == DOM_STACK_CODE_OK)
 		pop_dom_node(stack);
+
+	return node;
 }
 
 
@@ -214,7 +218,8 @@ parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner)
 				token = NULL;
 			}
 
-			add_sgml_attribute(stack, &name, token);
+			if (!add_sgml_attribute(stack, &name, token))
+				return SGML_PARSER_CODE_MEM_ALLOC;
 
 			/* Skip the value token */
 			if (token)
@@ -254,15 +259,8 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
 		switch (token->type) {
 		case SGML_TOKEN_ELEMENT:
 		case SGML_TOKEN_ELEMENT_BEGIN:
-			if (!add_sgml_element(stack, token)) {
-				if (token->type == SGML_TOKEN_ELEMENT) {
-					skip_dom_scanner_token(scanner);
-					break;
-				}
-
-				skip_sgml_tokens(scanner, SGML_TOKEN_TAG_END);
-				break;
-			}
+			if (!add_sgml_element(stack, token))
+				return SGML_PARSER_CODE_MEM_ALLOC;
 
 			if (token->type == SGML_TOKEN_ELEMENT_BEGIN) {
 				enum sgml_parser_code code;
@@ -305,7 +303,8 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
 			break;
 
 		case SGML_TOKEN_NOTATION_COMMENT:
-			add_sgml_node(stack, DOM_NODE_COMMENT, token);
+			if (!add_sgml_node(stack, DOM_NODE_COMMENT, token))
+				return SGML_PARSER_CODE_MEM_ALLOC;
 			skip_dom_scanner_token(scanner);
 			break;
 
@@ -318,7 +317,8 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
 			break;
 
 		case SGML_TOKEN_CDATA_SECTION:
-			add_sgml_node(stack, DOM_NODE_CDATA_SECTION, token);
+			if (!add_sgml_node(stack, DOM_NODE_CDATA_SECTION, token))
+				return SGML_PARSER_CODE_MEM_ALLOC;
 			skip_dom_scanner_token(scanner);
 			break;
 
@@ -339,9 +339,10 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
 			/* Fall-through */
 
 		case SGML_TOKEN_PROCESS_DATA:
-			if (add_sgml_proc_instruction(stack, &target, token)
-			    && (target.type == SGML_TOKEN_PROCESS_XML
-			        || target.type == SGML_TOKEN_PROCESS_XML_STYLESHEET)
+			if (!add_sgml_proc_instruction(stack, &target, token))
+				return SGML_PARSER_CODE_MEM_ALLOC;
+			if ((target.type == SGML_TOKEN_PROCESS_XML
+			     || target.type == SGML_TOKEN_PROCESS_XML_STYLESHEET)
 			    && token->string.length > 0) {
 				/* Parse the <?xml data="attributes"?>. */
 				struct dom_scanner attr_scanner;

From 9e7b0d4fa31da8d1dbc0c468928312212439afc1 Mon Sep 17 00:00:00 2001
From: Jonas Fonseca <fonseca@diku.dk>
Date: Sat, 28 Jan 2006 01:09:05 +0100
Subject: [PATCH 05/20] Remove assertion logic from parse_sgml_attributes()

They are getting out of hand and making it hard to use the function in
'unusual' situations (like when resuming parsing inside elements).
---
 src/dom/sgml/parser.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/dom/sgml/parser.c b/src/dom/sgml/parser.c
index 03f3f0c10..598a4433f 100644
--- a/src/dom/sgml/parser.c
+++ b/src/dom/sgml/parser.c
@@ -170,13 +170,6 @@ parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner)
 {
 	struct dom_scanner_token name;
 
-	assert(dom_scanner_has_tokens(scanner)
-	       && (get_dom_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN
-	           || (get_dom_stack_top(stack)->node->type == DOM_NODE_PROCESSING_INSTRUCTION)));
-
-	if (get_dom_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN)
-		skip_dom_scanner_token(scanner);
-
 	while (dom_scanner_has_tokens(scanner)) {
 		struct dom_scanner_token *token = get_dom_scanner_token(scanner);
 
@@ -265,6 +258,8 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
 			if (token->type == SGML_TOKEN_ELEMENT_BEGIN) {
 				enum sgml_parser_code code;
 
+				skip_dom_scanner_token(scanner);
+
 				code = parse_sgml_attributes(stack, scanner);
 				if (code != SGML_PARSER_CODE_OK)
 					return code;

From c6e83d1d9c4ee844a0727727f7152362e0a6635a Mon Sep 17 00:00:00 2001
From: Jonas Fonseca <fonseca@diku.dk>
Date: Sat, 28 Jan 2006 01:12:03 +0100
Subject: [PATCH 06/20] Assert parsing depth >= parser stack depth

Like the comment says popping parsing nodes during incremental parsing
might trigger this.
---
 src/dom/sgml/parser.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dom/sgml/parser.c b/src/dom/sgml/parser.c
index 598a4433f..1ba159701 100644
--- a/src/dom/sgml/parser.c
+++ b/src/dom/sgml/parser.c
@@ -522,7 +522,7 @@ sgml_parsing_pop(struct dom_stack *stack, struct dom_node *node, void *data)
 		}
 		/* It's bigger than when calling done_sgml_parser() in the middle of an
 		 * incomplete parsing. */
-		assert(parsing->depth == parser->stack.depth);
+		assert(parsing->depth >= parser->stack.depth);	
 	}
 
 	done_dom_string(&parsing->incomplete);

From 4ab1dde8741c43de7af317231a126cc7e1a6146e Mon Sep 17 00:00:00 2001
From: Jonas Fonseca <fonseca@diku.dk>
Date: Sat, 28 Jan 2006 01:21:02 +0100
Subject: [PATCH 07/20] Preserve the scanner state when it is not the 'default'
 state

This is necessary to make it possible to resume parsing of element
attributes. Allows the incomplete string in the parsing state struct to
be unset.
---
 src/dom/sgml/parser.c | 40 ++++++++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/src/dom/sgml/parser.c b/src/dom/sgml/parser.c
index 1ba159701..82158438f 100644
--- a/src/dom/sgml/parser.c
+++ b/src/dom/sgml/parser.c
@@ -452,15 +452,19 @@ sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data)
 		struct sgml_parsing_state *parent = &parsing[-1];
 
 		if (parent->resume) {
-			assert(is_dom_string_set(&parent->incomplete));
+			if (is_dom_string_set(&parent->incomplete)) {
 
-			if (!add_to_dom_string(&parent->incomplete,
-					       string->string, string->length)) {
-				parser->code = SGML_PARSER_CODE_MEM_ALLOC;
-				return DOM_STACK_CODE_OK;
+				if (!add_to_dom_string(&parent->incomplete,
+						       string->string,
+						       string->length)) {
+
+					parser->code = SGML_PARSER_CODE_MEM_ALLOC;
+					return DOM_STACK_CODE_OK;
+				}
+
+				string = &parent->incomplete;
 			}
 
-			string = &parent->incomplete;
 			scanner_state = parent->scanner.state;
 
 			/* Pop down to the parent. */
@@ -474,19 +478,31 @@ sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data)
 			 scanner_state, count_lines, complete, incremental,
 			 detect_errors);
 
-	{
-		int immutable = get_dom_stack_top(&parser->stack)->immutable;
-
-		get_dom_stack_top(&parser->stack)->immutable = 1;
+	if (scanner_state == SGML_STATE_ELEMENT) {
+		parser->code = parse_sgml_attributes(&parser->stack, &parsing->scanner);
+		if (parser->code == SGML_PARSER_CODE_OK)
+			parser->code = parse_sgml_plain(&parser->stack, &parsing->scanner);
+	} else {
 		parser->code = parse_sgml_plain(&parser->stack, &parsing->scanner);
-		get_dom_stack_top(&parser->stack)->immutable = !!immutable;
 	}
 
-	if (complete || parser->code != SGML_PARSER_CODE_INCOMPLETE) {
+	if (complete) {
 		pop_dom_node(&parser->parsing);
 		return DOM_STACK_CODE_OK;
 	}
 
+	if (parser->code != SGML_PARSER_CODE_INCOMPLETE) {
+		/* No need to preserve the default scanner state. */
+		if (parsing->scanner.state == SGML_STATE_TEXT) {
+			pop_dom_node(&parser->parsing);
+			return DOM_STACK_CODE_OK;
+		}
+
+		done_dom_string(&parsing->incomplete);
+		parsing->resume = 1;
+		return DOM_STACK_CODE_OK;
+	}
+
 	token = get_dom_scanner_token(&parsing->scanner);
 	assert(token && token->type == SGML_TOKEN_INCOMPLETE);
 

From e5e06764c4345b279bde1d6f44749ddffb90740c Mon Sep 17 00:00:00 2001
From: Jonas Fonseca <fonseca@diku.dk>
Date: Sat, 28 Jan 2006 01:40:56 +0100
Subject: [PATCH 08/20] Improve checks for incompleteness when parsing
 attributes

Check whether there are '=' and value tokens before handling them. If there
is any doubt the whole attribute structure is 'pushed back' into the
stream. That way incremental parsing will not add the value as a new
attribute because the name token was handled in the previous parsing run.
---
 src/dom/sgml/parser.c | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/src/dom/sgml/parser.c b/src/dom/sgml/parser.c
index 82158438f..22f35cac9 100644
--- a/src/dom/sgml/parser.c
+++ b/src/dom/sgml/parser.c
@@ -165,6 +165,37 @@ call_sgml_error_function(struct dom_stack *stack, struct dom_scanner_token *toke
 	return parser->error_func(parser, &token->string, line);
 }
 
+/* Appends to or 'creates' an incomplete token. This can be used to
+ * force tokens back into the 'stream' if they require that later tokens
+ * are available.
+ *
+ * NOTE: You can only do this for tokens that are not stripped of markup such
+ * as identifiers. */
+static enum sgml_parser_code
+check_sgml_incomplete(struct dom_scanner *scanner,
+		      struct dom_scanner_token *start,
+		      struct dom_scanner_token *token)
+{
+	if (token && token->type == SGML_TOKEN_INCOMPLETE) {
+		token->string.length += token->string.string - start->string.string;
+		token->string.string = start->string.string;
+		return 1;
+
+	} else if (!token && scanner->check_complete && scanner->incomplete) {
+		size_t left = scanner->end - start->string.string;
+
+		assert(left > 0);
+
+		token = scanner->current = scanner->table;
+		scanner->tokens = 1;
+		token->type = SGML_TOKEN_INCOMPLETE;
+		set_dom_string(&token->string, start->string.string, left);
+		return 1;
+	}
+
+	return 0;
+}
+
 static inline enum sgml_parser_code
 parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner)
 {
@@ -195,7 +226,7 @@ parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner)
 				/* If the token is not a valid value token
 				 * ignore it. */
 				token = get_next_dom_scanner_token(scanner);
-				if (token && token->type == SGML_TOKEN_INCOMPLETE)
+				if (check_sgml_incomplete(scanner, &name, token))
 					return SGML_PARSER_CODE_INCOMPLETE;
 
 				if (token
@@ -204,7 +235,7 @@ parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner)
 				    && token->type != SGML_TOKEN_STRING)
 					token = NULL;
 
-			} else if (token && token->type == SGML_TOKEN_INCOMPLETE) {
+			} else if (check_sgml_incomplete(scanner, &name, token)) {
 				return SGML_PARSER_CODE_INCOMPLETE;
 
 			} else {

From bae3b581464a38281e44a6434d7f24580bd8e633 Mon Sep 17 00:00:00 2001
From: Laurent MONIN <zas@norz.org>
Date: Sat, 28 Jan 2006 01:58:00 +0100
Subject: [PATCH 09/20] Fix a potential memleak.

---
 src/document/html/parser/general.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/document/html/parser/general.c b/src/document/html/parser/general.c
index 91588e870..af2e8d553 100644
--- a/src/document/html/parser/general.c
+++ b/src/document/html/parser/general.c
@@ -364,7 +364,10 @@ imported:
 
 		ecmascript_eval(interpreter, &code, &ret);
 		done_string(&code);
-		if (!ret.length) return;
+		if (!ret.length) {
+			done_string(&ret);
+			return;
+		}
 
 		/* FIXME: it doesn't work */
 		html_top->invisible = 0;

From 5114c9d1104501c7ae9b1e00e22a406a339d8156 Mon Sep 17 00:00:00 2001
From: Laurent MONIN <zas@norz.org>
Date: Sat, 28 Jan 2006 01:59:22 +0100
Subject: [PATCH 10/20] Trim trailing whitespaces.

---
 src/document/dom/renderer.c | 2 +-
 src/dom/configuration.h     | 4 ++--
 src/protocol/fsp/fsp.c      | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/document/dom/renderer.c b/src/document/dom/renderer.c
index 131f30eb6..451e21fb0 100644
--- a/src/document/dom/renderer.c
+++ b/src/document/dom/renderer.c
@@ -1024,7 +1024,7 @@ render_dom_document(struct cache_entry *cached, struct document *document,
 	} else if (renderer.doctype == SGML_DOCTYPE_RSS) {
 		add_dom_stack_context(&parser->stack, &renderer,
 				      &dom_rss_renderer_context_info);
-		add_dom_config_normalizer(&parser->stack, RSS_CONFIG_FLAGS); 
+		add_dom_config_normalizer(&parser->stack, RSS_CONFIG_FLAGS);
 	}
 
 	/* FIXME: When rendering this way we don't really care about the code.
diff --git a/src/dom/configuration.h b/src/dom/configuration.h
index f323c386f..ad0bdb319 100644
--- a/src/dom/configuration.h
+++ b/src/dom/configuration.h
@@ -7,7 +7,7 @@ struct dom_stack;
 /* API Doc :: dom-config */
 
 /** DOM Configuration
- * 
+ *
  * The DOMConfiguration interface represents the configuration of a document.
  * Using the configuration, it is possible to change the behaviour of how
  * document normalization is done, such as replacing the CDATASection nodes
@@ -33,7 +33,7 @@ enum dom_config_flag {
 	DOM_CONFIG_COMMENTS = 2,
 
 	/** "element-content-whitespace"
-	 * 
+	 *
 	 * The default is true and will keep all whitespaces in the document.
 	 * When false, discard all Text nodes that contain only whitespaces. */
 	DOM_CONFIG_ELEMENT_CONTENT_WHITESPACE = 4,
diff --git a/src/protocol/fsp/fsp.c b/src/protocol/fsp/fsp.c
index d343d8e11..4696657ce 100644
--- a/src/protocol/fsp/fsp.c
+++ b/src/protocol/fsp/fsp.c
@@ -296,7 +296,7 @@ end:
 		abort_connection(conn, S_OUT_OF_MEM);
 		return;
 	}
-	read_from_socket(conn->data_socket, buf, S_CONN, fsp_got_data); 
+	read_from_socket(conn->data_socket, buf, S_CONN, fsp_got_data);
 }
 
 #undef READ_SIZE

From d92a074e40175834721592a94e2127f909717f67 Mon Sep 17 00:00:00 2001
From: Jonas Fonseca <fonseca@diku.dk>
Date: Sat, 28 Jan 2006 03:21:27 +0100
Subject: [PATCH 11/20] Fix parsing of '<a< b>' where the scanner didn't rewind
 to the proper place

Add test for this tag soup combo.
---
 src/dom/sgml/scanner.c              |  4 ++--
 src/dom/test/test-sgml-parser-basic | 10 +++++++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/dom/sgml/scanner.c b/src/dom/sgml/scanner.c
index 13a517ee2..656fad20e 100644
--- a/src/dom/sgml/scanner.c
+++ b/src/dom/sgml/scanner.c
@@ -449,13 +449,13 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 		if (scanner->state == SGML_STATE_ELEMENT) {
 			/* Already inside an element so insert a tag end token
 			 * and continue scanning in next iteration. */
-			string--;
-			real_length = 0;
 			type = SGML_TOKEN_TAG_END;
 			scanner_state = SGML_STATE_TEXT;
 
 			/* We are creating a 'virtual' that has no source. */
 			possibly_incomplete = 0;
+			string = token->string.string;
+			real_length = 0;
 
 		} else if (is_sgml_ident(*string)) {
 			token->string.string = string;
diff --git a/src/dom/test/test-sgml-parser-basic b/src/dom/test/test-sgml-parser-basic
index 1a22b7fed..f19f158eb 100755
--- a/src/dom/test/test-sgml-parser-basic
+++ b/src/dom/test/test-sgml-parser-basic
@@ -56,7 +56,7 @@ element: root
     #text: a'
 
 test_output_equals \
-'Parse tag soup elements.' \
+'Parse tag soup elements. (I)' \
 '<parent attr="value" <child:1></><child:2</>a</parent>' \
 '
 element: parent
@@ -65,6 +65,14 @@ element: parent
   element: child:2
   #text: a'
 
+test_output_equals \
+'Parse tag soup elements. (II)' \
+'< a >< b < c / >< / >' \
+'
+element: a
+  element: b
+    element: c'
+
 test_output_equals \
 'Parse an enclosed comment.' \
 '<root><!-- Hello World! --></root>' \

From 00c4e0bfa22b5555e4eb6bdd1e0ff9d1d934f5a3 Mon Sep 17 00:00:00 2001
From: Jonas Fonseca <fonseca@diku.dk>
Date: Sat, 28 Jan 2006 03:23:06 +0100
Subject: [PATCH 12/20] Do not attempt to read *string when string ==
 scanner->end

There might be other places that needs to be reviewd for this.
---
 src/dom/sgml/scanner.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/dom/sgml/scanner.c b/src/dom/sgml/scanner.c
index 656fad20e..359a095ff 100644
--- a/src/dom/sgml/scanner.c
+++ b/src/dom/sgml/scanner.c
@@ -457,6 +457,9 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 			string = token->string.string;
 			real_length = 0;
 
+		} else if (string == scanner->end) {
+			/* It is incomplete. */
+
 		} else if (is_sgml_ident(*string)) {
 			token->string.string = string;
 			scan_sgml(scanner, string, SGML_CHAR_IDENT);

From 823c5945241feccce636bc93b0260f25f3210b58 Mon Sep 17 00:00:00 2001
From: Jonas Fonseca <fonseca@diku.dk>
Date: Sat, 28 Jan 2006 03:24:16 +0100
Subject: [PATCH 13/20] Use ssize_t instead of size_t for length since it must
 carry a signed value

---
 src/dom/sgml/scanner.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dom/sgml/scanner.c b/src/dom/sgml/scanner.c
index 359a095ff..19df8fcea 100644
--- a/src/dom/sgml/scanner.c
+++ b/src/dom/sgml/scanner.c
@@ -705,7 +705,7 @@ scan_sgml_proc_inst_token(struct dom_scanner *scanner, struct dom_scanner_token
 {
 	unsigned char *string = scanner->position;
 	/* The length can be empty for '<??>'. */
-	size_t length = -1;
+	ssize_t length = -1;
 
 	token->string.string = string;
 

From 95c1de23158baa807bb66026fc5687acbea5a4dc Mon Sep 17 00:00:00 2001
From: Jonas Fonseca <fonseca@diku.dk>
Date: Sat, 28 Jan 2006 03:35:36 +0100
Subject: [PATCH 14/20] Fix handling of incomplete processing instructions

When doing incremental rendering we now require the whole thing to be there
and that there is room for two tokens in the scanner token table.  This is
necessary because we have to generate both a processing target token and a
processing data token to make life simpler for the parser.

Remove processing instruction data case label from the main parser loop. It
is safer this way since it already assumes that the processing target token
has been stored.
---
 src/dom/sgml/parser.c  |  1 -
 src/dom/sgml/scanner.c | 25 ++++++++++++++++++++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/dom/sgml/parser.c b/src/dom/sgml/parser.c
index 22f35cac9..bd9e6b9ee 100644
--- a/src/dom/sgml/parser.c
+++ b/src/dom/sgml/parser.c
@@ -364,7 +364,6 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
 			assert(token->type == SGML_TOKEN_PROCESS_DATA);
 			/* Fall-through */
 
-		case SGML_TOKEN_PROCESS_DATA:
 			if (!add_sgml_proc_instruction(stack, &target, token))
 				return SGML_PARSER_CODE_MEM_ALLOC;
 			if ((target.type == SGML_TOKEN_PROCESS_XML
diff --git a/src/dom/sgml/scanner.c b/src/dom/sgml/scanner.c
index 19df8fcea..e2e7c5db5 100644
--- a/src/dom/sgml/scanner.c
+++ b/src/dom/sgml/scanner.c
@@ -543,6 +543,29 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 				possibly_incomplete = 0;
 			}
 
+			if (scanner->check_complete && scanner->incomplete) {
+				/* We need to fit both the process target token
+				 * and the process data token into the scanner
+				 * table. */
+				if (token + 1 >= scanner->table + DOM_SCANNER_TOKENS) {
+					possibly_incomplete = 1;
+
+				} else if (!possibly_incomplete) {
+					/* FIXME: We do this twice. */
+					for (pos = string + 1;
+					     (pos = skip_sgml_chars(scanner, pos, '>'));
+					     pos++) {
+						if (pos[-1] == '?')
+							break;
+					}
+					if (!pos)
+						possibly_incomplete = 1;
+				}
+
+				if (possibly_incomplete)
+					string = scanner->end;
+			}
+
 		} else if (*string == '/') {
 			string++;
 			skip_sgml_space(scanner, &string);
@@ -707,7 +730,7 @@ scan_sgml_proc_inst_token(struct dom_scanner *scanner, struct dom_scanner_token
 	/* The length can be empty for '<??>'. */
 	ssize_t length = -1;
 
-	token->string.string = string;
+	token->string.string = string++;
 
 	/* Figure out where the processing instruction ends. This doesn't use
 	 * skip_sgml() since we MUST ignore precedence here to allow '<' inside

From 495fb2805be29e4bd8249144c9fbe51d7fc7df1a Mon Sep 17 00:00:00 2001
From: Miciah Dashiel Butler Masters <miciah@myrealbox.com>
Date: Sat, 28 Jan 2006 03:23:17 +0000
Subject: [PATCH 15/20] SMJS: Give user scripts access to the view_state

Introduce the view_state object to ECMAScript with properties .uri and
.plain and pass the current view_state to preformat hooks.
---
 contrib/smjs/google_video.js           |   2 +-
 contrib/smjs/hooks.js                  |   8 +-
 src/scripting/smjs/Makefile            |   2 +-
 src/scripting/smjs/hooks.c             |  16 +++-
 src/scripting/smjs/view_state_object.c | 109 +++++++++++++++++++++++++
 src/scripting/smjs/view_state_object.h |   9 ++
 6 files changed, 137 insertions(+), 9 deletions(-)
 create mode 100644 src/scripting/smjs/view_state_object.c
 create mode 100644 src/scripting/smjs/view_state_object.h

diff --git a/contrib/smjs/google_video.js b/contrib/smjs/google_video.js
index f267941f1..46b5ee192 100644
--- a/contrib/smjs/google_video.js
+++ b/contrib/smjs/google_video.js
@@ -1,7 +1,7 @@
 /* Play videos at video.google.com with minimal niggling. Just follow the link
  * from the front page or the search page, and the video will automatically
  * be loaded. */
-function load_google_video(cached) {
+function load_google_video(cached, vs) {
 	if (!cached.uri.match(/^http:\/\/video.google.com\/videoplay/))
 		return true;
 
diff --git a/contrib/smjs/hooks.js b/contrib/smjs/hooks.js
index 875904d32..d4516c715 100644
--- a/contrib/smjs/hooks.js
+++ b/contrib/smjs/hooks.js
@@ -8,9 +8,9 @@ elinks.keymaps.main["@"] = function () {
 };
 
 elinks.preformat_html_hooks = new Array();
-elinks.preformat_html = function (cached) {
+elinks.preformat_html = function (cached, vs) {
 	for (var i in elinks.preformat_html_hooks)
-		if (!elinks.preformat_html_hooks[i](cached))
+		if (!elinks.preformat_html_hooks[i](cached, vs))
 			return false;
 
 	return true;
@@ -36,13 +36,13 @@ elinks.follow_url_hook = function (url) {
 	return url;
 };
 
-function root_w00t(cached) {
+function root_w00t(cached, vs) {
 	cached.content = cached.content.replace(/root/g, "w00t");
 	return true;
 };
 elinks.preformat_html_hooks.push(root_w00t);
 
-function mangle_deb_bugnumbers(cached) {
+function mangle_deb_bugnumbers(cached, vs) {
 	if (!cached.uri.match(/^[a-z0-9]+:\/\/[a-z0-9A-Z.-]+debian\.org/)
 	    && !cached.uri.match(/changelog\.Debian/))
 		return true;
diff --git a/src/scripting/smjs/Makefile b/src/scripting/smjs/Makefile
index 7ff12a144..1689f4789 100644
--- a/src/scripting/smjs/Makefile
+++ b/src/scripting/smjs/Makefile
@@ -4,6 +4,6 @@ include $(top_builddir)/Makefile.config
 INCLUDES += $(SPIDERMONKEY_CFLAGS)
 
 OBJS = smjs.o core.o global_object.o hooks.o elinks_object.o cache_object.o \
-       bookmarks.o keybinding.o
+       view_state_object.o bookmarks.o keybinding.o
 
 include $(top_srcdir)/Makefile.lib
diff --git a/src/scripting/smjs/hooks.c b/src/scripting/smjs/hooks.c
index 91a768a41..03097ef01 100644
--- a/src/scripting/smjs/hooks.c
+++ b/src/scripting/smjs/hooks.c
@@ -12,10 +12,13 @@
 #include "main/event.h"
 #include "main/module.h"
 #include "scripting/smjs/cache_object.h"
+#include "scripting/smjs/view_state_object.h"
 #include "scripting/smjs/core.h"
 #include "scripting/smjs/elinks_object.h"
 #include "scripting/smjs/hooks.h"
+#include "session/location.h"
 #include "session/session.h"
+#include "viewer/text/vs.h"
 
 
 static enum evhook_status
@@ -88,8 +91,8 @@ script_hook_pre_format_html(va_list ap, void *data)
 	struct session *ses = va_arg(ap, struct session *);
 	struct cache_entry *cached = va_arg(ap, struct cache_entry *);
 	enum evhook_status ret = EVENT_HOOK_STATUS_NEXT;
-	JSObject *cache_entry_object;
-	jsval args[1], rval;
+	JSObject *cache_entry_object, *view_state_object = JSVAL_NULL;
+	jsval args[2], rval;
 
 	evhook_use_params(ses && cached);
 
@@ -97,13 +100,20 @@ script_hook_pre_format_html(va_list ap, void *data)
 
 	smjs_ses = ses;
 
+	if (have_location(ses)) {
+		struct view_state *vs = &cur_loc(ses)->vs;
+
+		view_state_object = smjs_get_view_state_object(vs);
+	}
+
 	cache_entry_object = smjs_get_cache_entry_object(cached);
 	if (!cache_entry_object) goto end;
 
 	args[0] = OBJECT_TO_JSVAL(cache_entry_object);
+	args[1] = OBJECT_TO_JSVAL(view_state_object);
 
 	if (JS_TRUE == smjs_invoke_elinks_object_method("preformat_html",
-	                                                args, 1, &rval))
+	                                                args, 2, &rval))
 		if (JS_FALSE == JSVAL_TO_BOOLEAN(rval))
 			ret = EVENT_HOOK_STATUS_LAST;
 
diff --git a/src/scripting/smjs/view_state_object.c b/src/scripting/smjs/view_state_object.c
new file mode 100644
index 000000000..a40457e76
--- /dev/null
+++ b/src/scripting/smjs/view_state_object.c
@@ -0,0 +1,109 @@
+/* Exports struct view_state to the world of ECMAScript */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+
+#include "elinks.h"
+
+#include "ecmascript/spidermonkey/util.h"
+#include "protocol/uri.h"
+#include "scripting/smjs/view_state_object.h"
+#include "scripting/smjs/core.h"
+#include "util/error.h"
+#include "util/memory.h"
+#include "viewer/text/vs.h"
+
+enum view_state_prop {
+	VIEW_STATE_PLAIN,
+	VIEW_STATE_URI,
+};
+
+static const JSPropertySpec view_state_props[] = {
+	{ "plain", VIEW_STATE_PLAIN, JSPROP_ENUMERATE },
+	{ "uri",   VIEW_STATE_URI,   JSPROP_ENUMERATE | JSPROP_READONLY },
+	{ NULL }
+};
+
+static JSBool
+view_state_get_property(JSContext *ctx, JSObject *obj, jsval id, jsval *vp)
+{
+	struct view_state *vs = JS_GetPrivate(ctx, obj);
+
+	undef_to_jsval(ctx, vp);
+
+	if (!JSVAL_IS_INT(id))
+		return JS_FALSE;
+
+	switch (JSVAL_TO_INT(id)) {
+	case VIEW_STATE_PLAIN:
+		*vp = INT_TO_JSVAL(vs->plain);
+
+		return JS_TRUE;
+	case VIEW_STATE_URI:
+		*vp = STRING_TO_JSVAL(JS_NewStringCopyZ(smjs_ctx,
+		                                        struri(vs->uri)));
+
+		return JS_TRUE;
+	default:
+		INTERNAL("Invalid ID %d in view_state_get_property().",
+		         JSVAL_TO_INT(id));
+	}
+
+	return JS_FALSE;
+}
+
+static JSBool
+view_state_set_property(JSContext *ctx, JSObject *obj, jsval id, jsval *vp)
+{
+	struct view_state *vs = JS_GetPrivate(ctx, obj);
+
+	if (!JSVAL_IS_INT(id))
+		return JS_FALSE;
+
+	switch (JSVAL_TO_INT(id)) {
+	case VIEW_STATE_PLAIN: {
+		vs->plain = atol(jsval_to_string(ctx, vp));
+
+		return JS_TRUE;
+	}
+	default:
+		INTERNAL("Invalid ID %d in view_state_set_property().",
+		         JSVAL_TO_INT(id));
+	}
+
+	return JS_FALSE;
+}
+
+static const JSClass view_state_class = {
+	"view_state",
+	JSCLASS_HAS_PRIVATE,
+	JS_PropertyStub, JS_PropertyStub,
+	view_state_get_property, view_state_set_property,
+	JS_EnumerateStub, JS_ResolveStub, JS_ConvertStub, JS_FinalizeStub
+};
+
+JSObject *
+smjs_get_view_state_object(struct view_state *vs)
+{
+	JSObject *view_state_object;
+
+	assert(smjs_ctx);
+
+	view_state_object = JS_NewObject(smjs_ctx,
+	                                  (JSClass *) &view_state_class,
+	                                  NULL, NULL);
+
+	if (!view_state_object) return NULL;
+
+	if (JS_FALSE == JS_SetPrivate(smjs_ctx, view_state_object, vs))
+		return NULL;
+
+	if (JS_FALSE == JS_DefineProperties(smjs_ctx, view_state_object,
+	                               (JSPropertySpec *) view_state_props))
+		return NULL;
+
+	return view_state_object;
+}
diff --git a/src/scripting/smjs/view_state_object.h b/src/scripting/smjs/view_state_object.h
new file mode 100644
index 000000000..daf770eb6
--- /dev/null
+++ b/src/scripting/smjs/view_state_object.h
@@ -0,0 +1,9 @@
+#ifndef EL__SCRIPTING_SMJS_VIEW_STATE_OBJECT_H
+#define EL__SCRIPTING_SMJS_VIEW_STATE_OBJECT_H
+
+struct view_state;
+
+JSObject *smjs_get_view_state_object(struct view_state *vs);
+
+#endif
+

From 2e9d433402d44eafa1630e10760b2635a5d6072b Mon Sep 17 00:00:00 2001
From: Miciah Dashiel Butler Masters <miciah@myrealbox.com>
Date: Sat, 28 Jan 2006 03:27:41 +0000
Subject: [PATCH 16/20] SMJS: mangle_deb_bugnumbers: add <pre>; set rendered
 view

Use the newly available view_state object to change to rendered view
and wrap everything in <pre> ... </pre> if the document MIME type is
text/plain.
---
 contrib/smjs/hooks.js | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/contrib/smjs/hooks.js b/contrib/smjs/hooks.js
index d4516c715..0278e46f2 100644
--- a/contrib/smjs/hooks.js
+++ b/contrib/smjs/hooks.js
@@ -55,7 +55,14 @@ function mangle_deb_bugnumbers(cached, vs) {
 	/* Debian Policy Manual 4.4 footnote 16 */
 	var closes_re = /closes:\s*(?:bug)?\#?\s?\d+(?:,\s*(?:bug)?\#?\s?\d+)*/gi;
 
-	cached.content = cached.content.replace(closes_re, rewrite_closes_fn);
+	var new_content = cached.content.replace(closes_re, rewrite_closes_fn);
+	if (cached.content_type == 'text/plain') {
+		cached.content = '<pre>' + new_content + '</pre>';
+		vs.plain = "0";
+	} else {
+		cached.content = new_content;
+	}
+
 
 	return true;
 }

From 24a9d103b4ee5ede577297bf49db93b83f8860a0 Mon Sep 17 00:00:00 2001
From: Jonas Fonseca <fonseca@diku.dk>
Date: Sat, 28 Jan 2006 04:09:31 +0100
Subject: [PATCH 17/20] DOM: Add allocated flag to struct dom_node; replaces
 subtype flags

Prepare for handling of allocated strings in the various nodes.
---
 src/dom/configuration.c | 10 +++++-----
 src/dom/node.c          | 17 ++++++++++++++---
 src/dom/node.h          |  9 +++------
 3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/src/dom/configuration.c b/src/dom/configuration.c
index fb8f8c921..b228f02e8 100644
--- a/src/dom/configuration.c
+++ b/src/dom/configuration.c
@@ -44,11 +44,11 @@ normalize_text_node_whitespace(struct dom_node *node)
 		}
 	}
 
-	if (node->data.text.allocated)
+	if (node->allocated)
 		done_dom_string(&node->string);
 
 	set_dom_string(&node->string, string.string, string.length);
-	node->data.text.allocated = 1;
+	node->allocated = 1;
 
 	return DOM_STACK_CODE_OK;
 
@@ -74,14 +74,14 @@ append_node_text(struct dom_config *config, struct dom_node *node)
 		set_dom_string(&dest, NULL, 0);
 
 	} else {
-		if (prev->data.text.allocated) {
+		if (prev->allocated) {
 			copy_struct(&dest, &prev->string);
 		} else {
 			set_dom_string(&dest, NULL, 0);
 			if (!add_to_dom_string(&dest, prev->string.string, prev->string.length))
 				return DOM_STACK_CODE_ERROR_MEM_ALLOC;
 			set_dom_string(&prev->string, dest.string, dest.length);
-			prev->data.text.allocated = 1;
+			prev->allocated = 1;
 		}
 	}
 
@@ -135,7 +135,7 @@ append_node_text(struct dom_config *config, struct dom_node *node)
 
 		node->type = DOM_NODE_TEXT;
 		memset(&node->data, 0, sizeof(node->data));
-		node->data.text.allocated = 1;
+		node->allocated = 1;
 		copy_struct(&node->string, &dest);
 
 		if ((config->flags & DOM_CONFIG_NORMALIZE_WHITESPACE)
diff --git a/src/dom/node.c b/src/dom/node.c
index 9995c2230..913397aee 100644
--- a/src/dom/node.c
+++ b/src/dom/node.c
@@ -202,7 +202,7 @@ struct dom_node *
 get_dom_node_map_entry(struct dom_node_list *list, enum dom_node_type type,
 		       uint16_t subtype, struct dom_string *name)
 {
-	struct dom_node node = { type, INIT_DOM_STRING(name->string, name->length) };
+	struct dom_node node = { type, 0, INIT_DOM_STRING(name->string, name->length) };
 	struct dom_node_search search = INIT_DOM_NODE_SEARCH(&node, list);
 
 	if (subtype) {
@@ -359,8 +359,10 @@ done_dom_node_data(struct dom_node *node)
 
 	switch (node->type) {
 	case DOM_NODE_ATTRIBUTE:
-		if (data->attribute.allocated)
+		if (node->allocated) {
 			done_dom_string(&node->string);
+			done_dom_string(&data->attribute.value);
+		}
 		break;
 
 	case DOM_NODE_DOCUMENT:
@@ -380,16 +382,25 @@ done_dom_node_data(struct dom_node *node)
 
 		if (data->element.map)
 			done_dom_node_list(data->element.map);
+
+		if (node->allocated)
+			done_dom_string(&node->string);
 		break;
 
 	case DOM_NODE_TEXT:
-		if (data->text.allocated)
+	case DOM_NODE_CDATA_SECTION:
+	case DOM_NODE_ENTITY_REFERENCE:
+		if (node->allocated)
 			done_dom_string(&node->string);
 		break;
 
 	case DOM_NODE_PROCESSING_INSTRUCTION:
 		if (data->proc_instruction.map)
 			done_dom_node_list(data->proc_instruction.map);
+		if (node->allocated) {
+			done_dom_string(&node->string);
+			done_dom_string(&data->proc_instruction.instruction);
+		}
 		break;
 
 	default:
diff --git a/src/dom/node.h b/src/dom/node.h
index f7de061b8..f1816dce6 100644
--- a/src/dom/node.h
+++ b/src/dom/node.h
@@ -115,9 +115,6 @@ struct dom_attribute_node {
 	 * it added from the document source. */
 	unsigned int specified:1;
 
-	/* Was the node->string allocated */
-	unsigned int allocated:1;
-
 	/* Has the node->string been converted to internal charset. */
 	unsigned int converted:1;
 
@@ -140,9 +137,6 @@ struct dom_text_node {
 	 * In order to quickly identify such nodes this member is used. */
 	unsigned int only_space:1;
 
-	/* Was the node->string allocated */
-	unsigned int allocated:1;
-
 	/* Has the node->string been converted to internal charset. */
 	unsigned int converted:1;
 };
@@ -197,6 +191,9 @@ struct dom_node {
 	/* The type of the node */
 	uint16_t type; /* -> enum dom_node_type */
 
+	/* Was the node string allocated? */
+	unsigned int allocated:1;
+
 	/* Can contain either stuff like element name or for attributes the
 	 * attribute name. */
 	struct dom_string string;

From b6b6d3c67e16da25565b4a3a9a76abf2fbc1bb3c Mon Sep 17 00:00:00 2001
From: Jonas Fonseca <fonseca@diku.dk>
Date: Sat, 28 Jan 2006 04:48:12 +0100
Subject: [PATCH 18/20] DOM: Allocate all node strings when doing incremental
 rendering

This changes init_dom_node_() to take an allocated argument saying whether
to allocate or not. If the value is -1, node->allocated will be set to the
value of node->parent->allocated. This way the value is inherited like we
do it in the menu code. It should be a sane default since we eventually
want not to rely on the 'underlying' source of the document and there will
be less variables to pass around.
---
 src/dom/node.c        | 39 +++++++++++++++++++++------------------
 src/dom/node.h        | 40 ++++++++++++++++++++++++++++++++--------
 src/dom/sgml/parser.c | 12 +++++++-----
 3 files changed, 60 insertions(+), 31 deletions(-)

diff --git a/src/dom/node.c b/src/dom/node.c
index 913397aee..d0ac956ae 100644
--- a/src/dom/node.c
+++ b/src/dom/node.c
@@ -314,7 +314,7 @@ get_dom_node_child(struct dom_node *parent, enum dom_node_type type,
 struct dom_node *
 init_dom_node_(unsigned char *file, int line,
 		struct dom_node *parent, enum dom_node_type type,
-		struct dom_string *string)
+		struct dom_string *string, int allocated)
 {
 #ifdef DEBUG_MEMLEAK
 	struct dom_node *node = debug_mem_calloc(file, line, 1, sizeof(*node));
@@ -326,7 +326,6 @@ init_dom_node_(unsigned char *file, int line,
 
 	node->type   = type;
 	node->parent = parent;
-	copy_dom_string(&node->string, string);
 
 	if (parent) {
 		struct dom_node_list **list = get_dom_node_list(parent, node);
@@ -343,6 +342,22 @@ init_dom_node_(unsigned char *file, int line,
 			done_dom_node(node);
 			return NULL;
 		}
+
+		/* Make it possible to add a node to a parent without
+		 * allocating the strings. */
+		node->allocated = allocated < 0 ? parent->allocated : !!allocated;
+
+	} else if (allocated >= 0) {
+			node->allocated = !!allocated;
+	}
+
+	if (node->allocated) {
+		if (!init_dom_string(&node->string, string->string, string->length)) {
+			done_dom_node(node);
+			return NULL;
+		}
+	} else {
+		copy_dom_string(&node->string, string);
 	}
 
 	return node;
@@ -359,10 +374,8 @@ done_dom_node_data(struct dom_node *node)
 
 	switch (node->type) {
 	case DOM_NODE_ATTRIBUTE:
-		if (node->allocated) {
-			done_dom_string(&node->string);
+		if (node->allocated)
 			done_dom_string(&data->attribute.value);
-		}
 		break;
 
 	case DOM_NODE_DOCUMENT:
@@ -382,31 +395,21 @@ done_dom_node_data(struct dom_node *node)
 
 		if (data->element.map)
 			done_dom_node_list(data->element.map);
-
-		if (node->allocated)
-			done_dom_string(&node->string);
-		break;
-
-	case DOM_NODE_TEXT:
-	case DOM_NODE_CDATA_SECTION:
-	case DOM_NODE_ENTITY_REFERENCE:
-		if (node->allocated)
-			done_dom_string(&node->string);
 		break;
 
 	case DOM_NODE_PROCESSING_INSTRUCTION:
 		if (data->proc_instruction.map)
 			done_dom_node_list(data->proc_instruction.map);
-		if (node->allocated) {
-			done_dom_string(&node->string);
+		if (node->allocated)
 			done_dom_string(&data->proc_instruction.instruction);
-		}
 		break;
 
 	default:
 		break;
 	}
 
+	if (node->allocated)
+		done_dom_string(&node->string);
 	mem_free(node);
 }
 
diff --git a/src/dom/node.h b/src/dom/node.h
index f1816dce6..32948ea15 100644
--- a/src/dom/node.h
+++ b/src/dom/node.h
@@ -256,12 +256,21 @@ get_dom_node_map_entry(struct dom_node_list *node_map,
 		       enum dom_node_type type, uint16_t subtype,
 		       struct dom_string *name);
 
+/* Removes the node and all its children and free()s itself */
+void done_dom_node(struct dom_node *node);
+
+/* The allocated argument is used as the value of node->allocated if >= 0.
+ * Use -1 to default node->allocated to the value of parent->allocated. */
 struct dom_node *
 init_dom_node_(unsigned char *file, int line,
 		struct dom_node *parent, enum dom_node_type type,
-		struct dom_string *string);
-#define init_dom_node(type, string) init_dom_node_(__FILE__, __LINE__, NULL, type, string)
-#define add_dom_node(parent, type, string) init_dom_node_(__FILE__, __LINE__, parent, type, string)
+		struct dom_string *string, int allocated);
+
+#define init_dom_node(type, string, allocated) \
+	init_dom_node_(__FILE__, __LINE__, NULL, type, string, allocated)
+
+#define add_dom_node(parent, type, string) \
+	init_dom_node_(__FILE__, __LINE__, parent, type, string, -1)
 
 #define add_dom_element(parent, string) \
 	add_dom_node(parent, DOM_NODE_ELEMENT, string)
@@ -273,7 +282,16 @@ add_dom_attribute(struct dom_node *parent, struct dom_string *name,
 	struct dom_node *node = add_dom_node(parent, DOM_NODE_ATTRIBUTE, name);
 
 	if (node && value) {
-		copy_dom_string(&node->data.attribute.value, value);
+		struct dom_string *str = &node->data.attribute.value;
+
+		if (node->allocated) {
+			if (!init_dom_string(str, value->string, value->length)) {
+				done_dom_node(node);
+				return NULL;
+			}
+		} else {
+			copy_dom_string(str, value);
+		}
 	}
 
 	return node;
@@ -286,15 +304,21 @@ add_dom_proc_instruction(struct dom_node *parent, struct dom_string *string,
 	struct dom_node *node = add_dom_node(parent, DOM_NODE_PROCESSING_INSTRUCTION, string);
 
 	if (node && instruction) {
-		copy_dom_string(&node->data.proc_instruction.instruction, instruction);
+		struct dom_string *str = &node->data.proc_instruction.instruction;
+
+		if (node->allocated) {
+			if (!init_dom_string(str, instruction->string, instruction->length)) {
+				done_dom_node(node);
+				return NULL;
+			}
+		} else {
+			copy_dom_string(str, instruction);
+		}
 	}
 
 	return node;
 }
 
-/* Removes the node and all its children and free()s itself */
-void done_dom_node(struct dom_node *node);
-
 /* Compare two nodes returning non-zero if they differ. */
 int dom_node_casecmp(struct dom_node *node1, struct dom_node *node2);
 
diff --git a/src/dom/sgml/parser.c b/src/dom/sgml/parser.c
index bd9e6b9ee..217b21151 100644
--- a/src/dom/sgml/parser.c
+++ b/src/dom/sgml/parser.c
@@ -35,11 +35,13 @@
  * information like node subtypes and SGML parser state information. */
 
 static inline struct dom_node *
-add_sgml_document(struct dom_stack *stack, struct dom_string *string)
+add_sgml_document(struct sgml_parser *parser)
 {
-	struct dom_node *node = init_dom_node(DOM_NODE_DOCUMENT, string);
+	int allocated = parser->flags & SGML_PARSER_INCREMENTAL;
+	struct dom_node *node;
 
-	if (node && push_dom_node(stack, node) == DOM_STACK_CODE_OK)
+	node = init_dom_node(DOM_NODE_DOCUMENT, &parser->uri, allocated);
+	if (node && push_dom_node(&parser->stack, node) == DOM_STACK_CODE_OK)
 		return node;
 
 	return NULL;
@@ -432,13 +434,13 @@ parse_sgml(struct sgml_parser *parser, unsigned char *buf, size_t bufsize,
 		parser->flags |= SGML_PARSER_COMPLETE;
 
 	if (!parser->root) {
-		parser->root = add_sgml_document(&parser->stack, &parser->uri);
+		parser->root = add_sgml_document(parser);
 		if (!parser->root)
 			return SGML_PARSER_CODE_MEM_ALLOC;
 		get_dom_stack_top(&parser->stack)->immutable = 1;
 	}
 
-	node = init_dom_node(DOM_NODE_TEXT, &source);
+	node = init_dom_node(DOM_NODE_TEXT, &source, 0);
 	if (!node || push_dom_node(&parser->parsing, node) != DOM_STACK_CODE_OK)
 		return SGML_PARSER_CODE_MEM_ALLOC;
 

From c6ba201e0c7c06f844f0379205869d69d243e310 Mon Sep 17 00:00:00 2001
From: Eric Wald <eswald@gmail.com>
Date: Sat, 28 Jan 2006 05:18:01 +0100
Subject: [PATCH 19/20] CONTRIB: Add ftplugin for setting Vim to the ELinks
 coding style

This will replace the .vimrc infrastructure currently used, which is
considered insecure.
---
 contrib/vim/c_elinks.vim | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 contrib/vim/c_elinks.vim

diff --git a/contrib/vim/c_elinks.vim b/contrib/vim/c_elinks.vim
new file mode 100644
index 000000000..51af01728
--- /dev/null
+++ b/contrib/vim/c_elinks.vim
@@ -0,0 +1,15 @@
+" Setting Vim to support the ELinks coding style
+"
+" To use this file, drop it in ~/.vim/ftplugin and set filetype plugin on.
+" Finally, make sure the path to the source directory contains the word
+" 'elinks', for example ~/src/elinks/.
+"
+" For .h files, link it as cpp_elinks.vim or define c_syntax_for_h in ~/.vimrc.
+" For .inc files, let g:filetype_inc = 'c' in ~/.vimrc.
+
+if expand('%:p:h') =~ '.*elinks.*'
+  setlocal shiftwidth=8
+  setlocal tabstop=8
+  setlocal softtabstop=0
+  setlocal noexpandtab
+endif

From 6a592b073c7ee33fac98e6d71f0561311ac2f0e2 Mon Sep 17 00:00:00 2001
From: Jonas Fonseca <fonseca@diku.dk>
Date: Sat, 28 Jan 2006 05:25:02 +0100
Subject: [PATCH 20/20] BUILD: Do not create .vimrc files

They are considered insecure, use the newly added ftplugin instead. This
also removes the config/vimrc master file.
---
 Makefile.lib | 9 ++-------
 config/vimrc | 8 --------
 2 files changed, 2 insertions(+), 15 deletions(-)
 delete mode 100644 config/vimrc

diff --git a/Makefile.lib b/Makefile.lib
index f0219e101..b09537604 100644
--- a/Makefile.lib
+++ b/Makefile.lib
@@ -127,12 +127,12 @@ CLEAN += $(PROG) $(OBJS)
 #############################################################################
 # The main default rules
 
-all-default: $(ALL_OBJS) $(PROGS) $(MAN1) $(MAN5) .vimrc
+all-default: $(ALL_OBJS) $(PROGS) $(MAN1) $(MAN5)
 
 # Ensure that Makefiles in subdirs are created before we recursive into them
 init-recursive: init-default
 
-init-default: .vimrc
+init-default:
 	@$(foreach subdir,$(sort $(SUBDIRS)), \
 		$(MKINSTALLDIRS) $(subdir) >/dev/null; \
 		test -e "$(subdir)/Makefile" \
@@ -164,11 +164,6 @@ ifdef MAN5
 		$(call ncmd,installdata,$(file),$(DESTDIR)$(mandir)/man5);)
 endif
 
-.vimrc: $(top_srcdir)/Makefile.lib
-	@{ echo ':set runtimepath+=.'; \
-	   echo ':runtime $(top_srcdir)/config/vimrc'; \
-	} > .vimrc
-
 ##############################################################################
 # Auto-testing infrastructure
 
diff --git a/config/vimrc b/config/vimrc
deleted file mode 100644
index 59c5edc29..000000000
--- a/config/vimrc
+++ /dev/null
@@ -1,8 +0,0 @@
-" Master vimrc file for the ELinks project
-
-:set shiftwidth=8
-:set tabstop=8
-:set softtabstop=0
-:set noexpandtab
-
-au BufNewFile,BufRead *.inc setf c