Merge with git+ssh://pasky.or.cz/srv/git/elinks.git

2025-06-30 22:19:29 -04:00 · 2006-01-28 12:11:29 +01:00 · 2006-01-28 12:11:29 +01:00 · dacb694e33
commit dacb694e33
parent 997f61bb32 6a592b073c
19 changed files with 597 additions and 124 deletions
--- a/Makefile.lib
+++ b/Makefile.lib
@ -127,12 +127,12 @@ CLEAN += $(PROG) $(OBJS)
 #############################################################################
 # The main default rules

-all-default: $(ALL_OBJS) $(PROGS) $(MAN1) $(MAN5) .vimrc
+all-default: $(ALL_OBJS) $(PROGS) $(MAN1) $(MAN5)

 # Ensure that Makefiles in subdirs are created before we recursive into them
 init-recursive: init-default

-init-default: .vimrc
+init-default:
 	@$(foreach subdir,$(sort $(SUBDIRS)), \
 		$(MKINSTALLDIRS) $(subdir) >/dev/null; \
 		test -e "$(subdir)/Makefile" \
@ -164,11 +164,6 @@ ifdef MAN5
 		$(call ncmd,installdata,$(file),$(DESTDIR)$(mandir)/man5);)
 endif

-.vimrc: $(top_srcdir)/Makefile.lib
-	@{ echo ':set runtimepath+=.'; \
-	   echo ':runtime $(top_srcdir)/config/vimrc'; \
-	} > .vimrc
-
 ##############################################################################
 # Auto-testing infrastructure

--- a/config/vimrc
+++ b/config/vimrc
@ -1,8 +0,0 @@
-" Master vimrc file for the ELinks project
-
-:set shiftwidth=8
-:set tabstop=8
-:set softtabstop=0
-:set noexpandtab
-
-au BufNewFile,BufRead *.inc setf c
--- a/contrib/smjs/google_video.js
+++ b/contrib/smjs/google_video.js
@ -1,7 +1,7 @@
 /* Play videos at video.google.com with minimal niggling. Just follow the link
 * from the front page or the search page, and the video will automatically
 * be loaded. */
-function load_google_video(cached) {
+function load_google_video(cached, vs) {
 	if (!cached.uri.match(/^http:\/\/video.google.com\/videoplay/))
 		return true;

--- a/contrib/smjs/hooks.js
+++ b/contrib/smjs/hooks.js
@ -8,9 +8,9 @@ elinks.keymaps.main["@"] = function () {
 };

 elinks.preformat_html_hooks = new Array();
-elinks.preformat_html = function (cached) {
+elinks.preformat_html = function (cached, vs) {
 	for (var i in elinks.preformat_html_hooks)
-		if (!elinks.preformat_html_hooks[i](cached))
+		if (!elinks.preformat_html_hooks[i](cached, vs))
 			return false;

 	return true;
@ -36,13 +36,13 @@ elinks.follow_url_hook = function (url) {
 	return url;
 };

-function root_w00t(cached) {
+function root_w00t(cached, vs) {
 	cached.content = cached.content.replace(/root/g, "w00t");
 	return true;
 };
 elinks.preformat_html_hooks.push(root_w00t);

-function mangle_deb_bugnumbers(cached) {
+function mangle_deb_bugnumbers(cached, vs) {
 	if (!cached.uri.match(/^[a-z0-9]+:\/\/[a-z0-9A-Z.-]+debian\.org/)
 	    && !cached.uri.match(/changelog\.Debian/))
 		return true;
@ -55,7 +55,14 @@ function mangle_deb_bugnumbers(cached) {
 	/* Debian Policy Manual 4.4 footnote 16 */
 	var closes_re = /closes:\s*(?:bug)?\#?\s?\d+(?:,\s*(?:bug)?\#?\s?\d+)*/gi;

-	cached.content = cached.content.replace(closes_re, rewrite_closes_fn);
+	var new_content = cached.content.replace(closes_re, rewrite_closes_fn);
+	if (cached.content_type == 'text/plain') {
+		cached.content = '<pre>' + new_content + '</pre>';
+		vs.plain = "0";
+	} else {
+		cached.content = new_content;
+	}
+

 	return true;
 }
--- a/contrib/vim/c_elinks.vim
+++ b/contrib/vim/c_elinks.vim
@ -0,0 +1,15 @@
+" Setting Vim to support the ELinks coding style
+"
+" To use this file, drop it in ~/.vim/ftplugin and set filetype plugin on.
+" Finally, make sure the path to the source directory contains the word
+" 'elinks', for example ~/src/elinks/.
+"
+" For .h files, link it as cpp_elinks.vim or define c_syntax_for_h in ~/.vimrc.
+" For .inc files, let g:filetype_inc = 'c' in ~/.vimrc.
+
+if expand('%:p:h') =~ '.*elinks.*'
+  setlocal shiftwidth=8
+  setlocal tabstop=8
+  setlocal softtabstop=0
+  setlocal noexpandtab
+endif
--- a/src/document/dom/renderer.c
+++ b/src/document/dom/renderer.c
@ -1024,7 +1024,7 @@ render_dom_document(struct cache_entry *cached, struct document *document,
 	} else if (renderer.doctype == SGML_DOCTYPE_RSS) {
 		add_dom_stack_context(&parser->stack, &renderer,
 				      &dom_rss_renderer_context_info);
-		add_dom_config_normalizer(&parser->stack, RSS_CONFIG_FLAGS); 
+		add_dom_config_normalizer(&parser->stack, RSS_CONFIG_FLAGS);
 	}

 	/* FIXME: When rendering this way we don't really care about the code.
--- a/src/dom/configuration.c
+++ b/src/dom/configuration.c
@ -44,11 +44,11 @@ normalize_text_node_whitespace(struct dom_node *node)
 		}
 	}

-	if (node->data.text.allocated)
+	if (node->allocated)
 		done_dom_string(&node->string);

 	set_dom_string(&node->string, string.string, string.length);
-	node->data.text.allocated = 1;
+	node->allocated = 1;

 	return DOM_STACK_CODE_OK;

@ -74,14 +74,14 @@ append_node_text(struct dom_config *config, struct dom_node *node)
 		set_dom_string(&dest, NULL, 0);

 	} else {
-		if (prev->data.text.allocated) {
+		if (prev->allocated) {
 			copy_struct(&dest, &prev->string);
 		} else {
 			set_dom_string(&dest, NULL, 0);
 			if (!add_to_dom_string(&dest, prev->string.string, prev->string.length))
 				return DOM_STACK_CODE_ERROR_MEM_ALLOC;
 			set_dom_string(&prev->string, dest.string, dest.length);
-			prev->data.text.allocated = 1;
+			prev->allocated = 1;
 		}
 	}

@ -135,7 +135,7 @@ append_node_text(struct dom_config *config, struct dom_node *node)

 		node->type = DOM_NODE_TEXT;
 		memset(&node->data, 0, sizeof(node->data));
-		node->data.text.allocated = 1;
+		node->allocated = 1;
 		copy_struct(&node->string, &dest);

 		if ((config->flags & DOM_CONFIG_NORMALIZE_WHITESPACE)
--- a/src/dom/configuration.h
+++ b/src/dom/configuration.h
@ -7,7 +7,7 @@ struct dom_stack;
 /* API Doc :: dom-config */

 /** DOM Configuration
- * 
+ *
 * The DOMConfiguration interface represents the configuration of a document.
 * Using the configuration, it is possible to change the behaviour of how
 * document normalization is done, such as replacing the CDATASection nodes
@ -33,7 +33,7 @@ enum dom_config_flag {
 	DOM_CONFIG_COMMENTS = 2,

 	/** "element-content-whitespace"
-	 * 
+	 *
 	 * The default is true and will keep all whitespaces in the document.
 	 * When false, discard all Text nodes that contain only whitespaces. */
 	DOM_CONFIG_ELEMENT_CONTENT_WHITESPACE = 4,
--- a/src/dom/node.c
+++ b/src/dom/node.c
@ -202,7 +202,7 @@ struct dom_node *
 get_dom_node_map_entry(struct dom_node_list *list, enum dom_node_type type,
 		       uint16_t subtype, struct dom_string *name)
 {
-	struct dom_node node = { type, INIT_DOM_STRING(name->string, name->length) };
+	struct dom_node node = { type, 0, INIT_DOM_STRING(name->string, name->length) };
 	struct dom_node_search search = INIT_DOM_NODE_SEARCH(&node, list);

 	if (subtype) {
@ -314,7 +314,7 @@ get_dom_node_child(struct dom_node *parent, enum dom_node_type type,
 struct dom_node *
 init_dom_node_(unsigned char *file, int line,
 		struct dom_node *parent, enum dom_node_type type,
-		struct dom_string *string)
+		struct dom_string *string, int allocated)
 {
 #ifdef DEBUG_MEMLEAK
 	struct dom_node *node = debug_mem_calloc(file, line, 1, sizeof(*node));
@ -326,7 +326,6 @@ init_dom_node_(unsigned char *file, int line,

 	node->type   = type;
 	node->parent = parent;
-	copy_dom_string(&node->string, string);

 	if (parent) {
 		struct dom_node_list **list = get_dom_node_list(parent, node);
@ -343,6 +342,22 @@ init_dom_node_(unsigned char *file, int line,
 			done_dom_node(node);
 			return NULL;
 		}
+
+		/* Make it possible to add a node to a parent without
+		 * allocating the strings. */
+		node->allocated = allocated < 0 ? parent->allocated : !!allocated;
+
+	} else if (allocated >= 0) {
+			node->allocated = !!allocated;
+	}
+
+	if (node->allocated) {
+		if (!init_dom_string(&node->string, string->string, string->length)) {
+			done_dom_node(node);
+			return NULL;
+		}
+	} else {
+		copy_dom_string(&node->string, string);
 	}

 	return node;
@ -359,8 +374,8 @@ done_dom_node_data(struct dom_node *node)

 	switch (node->type) {
 	case DOM_NODE_ATTRIBUTE:
-		if (data->attribute.allocated)
-			done_dom_string(&node->string);
+		if (node->allocated)
+			done_dom_string(&data->attribute.value);
 		break;

 	case DOM_NODE_DOCUMENT:
@ -382,20 +397,19 @@ done_dom_node_data(struct dom_node *node)
 			done_dom_node_list(data->element.map);
 		break;

-	case DOM_NODE_TEXT:
-		if (data->text.allocated)
-			done_dom_string(&node->string);
-		break;
-
 	case DOM_NODE_PROCESSING_INSTRUCTION:
 		if (data->proc_instruction.map)
 			done_dom_node_list(data->proc_instruction.map);
+		if (node->allocated)
+			done_dom_string(&data->proc_instruction.instruction);
 		break;

 	default:
 		break;
 	}

+	if (node->allocated)
+		done_dom_string(&node->string);
 	mem_free(node);
 }

--- a/src/dom/node.h
+++ b/src/dom/node.h
@ -115,9 +115,6 @@ struct dom_attribute_node {
 	 * it added from the document source. */
 	unsigned int specified:1;

-	/* Was the node->string allocated */
-	unsigned int allocated:1;
-
 	/* Has the node->string been converted to internal charset. */
 	unsigned int converted:1;

@ -140,9 +137,6 @@ struct dom_text_node {
 	 * In order to quickly identify such nodes this member is used. */
 	unsigned int only_space:1;

-	/* Was the node->string allocated */
-	unsigned int allocated:1;
-
 	/* Has the node->string been converted to internal charset. */
 	unsigned int converted:1;
 };
@ -151,9 +145,8 @@ enum dom_proc_instruction_type {
 	DOM_PROC_INSTRUCTION,

 	/* Keep this group sorted */
-	DOM_PROC_INSTRUCTION_DBHTML,	/* DocBook toolchain instruction */
-	DOM_PROC_INSTRUCTION_ELINKS,	/* Internal instruction hook */
-	DOM_PROC_INSTRUCTION_XML,	/* XML instructions */
+	DOM_PROC_INSTRUCTION_XML,		/* XML header */
+	DOM_PROC_INSTRUCTION_XML_STYLESHEET,	/* XML stylesheet link */

 	DOM_PROC_INSTRUCTION_TYPES
 };
@ -198,6 +191,9 @@ struct dom_node {
 	/* The type of the node */
 	uint16_t type; /* -> enum dom_node_type */

+	/* Was the node string allocated? */
+	unsigned int allocated:1;
+
 	/* Can contain either stuff like element name or for attributes the
 	 * attribute name. */
 	struct dom_string string;
@ -260,12 +256,21 @@ get_dom_node_map_entry(struct dom_node_list *node_map,
 		       enum dom_node_type type, uint16_t subtype,
 		       struct dom_string *name);

+/* Removes the node and all its children and free()s itself */
+void done_dom_node(struct dom_node *node);
+
+/* The allocated argument is used as the value of node->allocated if >= 0.
+ * Use -1 to default node->allocated to the value of parent->allocated. */
 struct dom_node *
 init_dom_node_(unsigned char *file, int line,
 		struct dom_node *parent, enum dom_node_type type,
-		struct dom_string *string);
-#define init_dom_node(type, string) init_dom_node_(__FILE__, __LINE__, NULL, type, string)
-#define add_dom_node(parent, type, string) init_dom_node_(__FILE__, __LINE__, parent, type, string)
+		struct dom_string *string, int allocated);
+
+#define init_dom_node(type, string, allocated) \
+	init_dom_node_(__FILE__, __LINE__, NULL, type, string, allocated)
+
+#define add_dom_node(parent, type, string) \
+	init_dom_node_(__FILE__, __LINE__, parent, type, string, -1)

 #define add_dom_element(parent, string) \
 	add_dom_node(parent, DOM_NODE_ELEMENT, string)
@ -277,7 +282,16 @@ add_dom_attribute(struct dom_node *parent, struct dom_string *name,
 	struct dom_node *node = add_dom_node(parent, DOM_NODE_ATTRIBUTE, name);

 	if (node && value) {
-		copy_dom_string(&node->data.attribute.value, value);
+		struct dom_string *str = &node->data.attribute.value;
+
+		if (node->allocated) {
+			if (!init_dom_string(str, value->string, value->length)) {
+				done_dom_node(node);
+				return NULL;
+			}
+		} else {
+			copy_dom_string(str, value);
+		}
 	}

 	return node;
@ -290,15 +304,21 @@ add_dom_proc_instruction(struct dom_node *parent, struct dom_string *string,
 	struct dom_node *node = add_dom_node(parent, DOM_NODE_PROCESSING_INSTRUCTION, string);

 	if (node && instruction) {
-		copy_dom_string(&node->data.proc_instruction.instruction, instruction);
+		struct dom_string *str = &node->data.proc_instruction.instruction;
+
+		if (node->allocated) {
+			if (!init_dom_string(str, instruction->string, instruction->length)) {
+				done_dom_node(node);
+				return NULL;
+			}
+		} else {
+			copy_dom_string(str, instruction);
+		}
 	}

 	return node;
 }

-/* Removes the node and all its children and free()s itself */
-void done_dom_node(struct dom_node *node);
-
 /* Compare two nodes returning non-zero if they differ. */
 int dom_node_casecmp(struct dom_node *node1, struct dom_node *node2);

--- a/src/dom/sgml/parser.c
+++ b/src/dom/sgml/parser.c
@ -35,11 +35,13 @@
 * information like node subtypes and SGML parser state information. */

 static inline struct dom_node *
-add_sgml_document(struct dom_stack *stack, struct dom_string *string)
+add_sgml_document(struct sgml_parser *parser)
 {
-	struct dom_node *node = init_dom_node(DOM_NODE_DOCUMENT, string);
+	int allocated = parser->flags & SGML_PARSER_INCREMENTAL;
+	struct dom_node *node;

-	if (node && push_dom_node(stack, node) == DOM_STACK_CODE_OK)
+	node = init_dom_node(DOM_NODE_DOCUMENT, &parser->uri, allocated);
+	if (node && push_dom_node(&parser->stack, node) == DOM_STACK_CODE_OK)
 		return node;

 	return NULL;
@ -74,7 +76,7 @@ add_sgml_element(struct dom_stack *stack, struct dom_scanner_token *token)
 }


-static inline void
+static inline struct dom_node *
 add_sgml_attribute(struct dom_stack *stack,
 		   struct dom_scanner_token *token, struct dom_scanner_token *valtoken)
 {
@ -96,9 +98,11 @@ add_sgml_attribute(struct dom_stack *stack,
 		node->data.attribute.quoted = 1;

 	if (!node || push_dom_node(stack, node) != DOM_STACK_CODE_OK)
-		return;
+		return NULL;

 	pop_dom_node(stack);
+
+	return node;
 }

 static inline struct dom_node *
@ -117,6 +121,10 @@ add_sgml_proc_instruction(struct dom_stack *stack, struct dom_scanner_token *tar
 		node->data.proc_instruction.type = DOM_PROC_INSTRUCTION_XML;
 		break;

+	case SGML_TOKEN_PROCESS_XML_STYLESHEET:
+		node->data.proc_instruction.type = DOM_PROC_INSTRUCTION_XML_STYLESHEET;
+		break;
+
 	case SGML_TOKEN_PROCESS:
 	default:
 		node->data.proc_instruction.type = DOM_PROC_INSTRUCTION;
@ -128,19 +136,21 @@ add_sgml_proc_instruction(struct dom_stack *stack, struct dom_scanner_token *tar
 	return NULL;
 }

-static inline void
+static inline struct dom_node *
 add_sgml_node(struct dom_stack *stack, enum dom_node_type type, struct dom_scanner_token *token)
 {
 	struct dom_node *parent = get_dom_stack_top(stack)->node;
 	struct dom_node *node = add_dom_node(parent, type, &token->string);

-	if (!node) return;
+	if (!node) return NULL;

 	if (token->type == SGML_TOKEN_SPACE)
 		node->data.text.only_space = 1;

 	if (push_dom_node(stack, node) == DOM_STACK_CODE_OK)
 		pop_dom_node(stack);
+
+	return node;
 }


@ -157,18 +167,42 @@ call_sgml_error_function(struct dom_stack *stack, struct dom_scanner_token *toke
 	return parser->error_func(parser, &token->string, line);
 }

+/* Appends to or 'creates' an incomplete token. This can be used to
+ * force tokens back into the 'stream' if they require that later tokens
+ * are available.
+ *
+ * NOTE: You can only do this for tokens that are not stripped of markup such
+ * as identifiers. */
+static enum sgml_parser_code
+check_sgml_incomplete(struct dom_scanner *scanner,
+		      struct dom_scanner_token *start,
+		      struct dom_scanner_token *token)
+{
+	if (token && token->type == SGML_TOKEN_INCOMPLETE) {
+		token->string.length += token->string.string - start->string.string;
+		token->string.string = start->string.string;
+		return 1;
+
+	} else if (!token && scanner->check_complete && scanner->incomplete) {
+		size_t left = scanner->end - start->string.string;
+
+		assert(left > 0);
+
+		token = scanner->current = scanner->table;
+		scanner->tokens = 1;
+		token->type = SGML_TOKEN_INCOMPLETE;
+		set_dom_string(&token->string, start->string.string, left);
+		return 1;
+	}
+
+	return 0;
+}
+
 static inline enum sgml_parser_code
 parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner)
 {
 	struct dom_scanner_token name;

-	assert(dom_scanner_has_tokens(scanner)
-	       && (get_dom_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN
-	           || (get_dom_stack_top(stack)->node->type == DOM_NODE_PROCESSING_INSTRUCTION)));
-
-	if (get_dom_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN)
-		skip_dom_scanner_token(scanner);
-
 	while (dom_scanner_has_tokens(scanner)) {
 		struct dom_scanner_token *token = get_dom_scanner_token(scanner);

@ -194,7 +228,7 @@ parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner)
 				/* If the token is not a valid value token
 				 * ignore it. */
 				token = get_next_dom_scanner_token(scanner);
-				if (token && token->type == SGML_TOKEN_INCOMPLETE)
+				if (check_sgml_incomplete(scanner, &name, token))
 					return SGML_PARSER_CODE_INCOMPLETE;

 				if (token
@ -203,14 +237,15 @@ parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner)
 				    && token->type != SGML_TOKEN_STRING)
 					token = NULL;

-			} else if (token && token->type == SGML_TOKEN_INCOMPLETE) {
+			} else if (check_sgml_incomplete(scanner, &name, token)) {
 				return SGML_PARSER_CODE_INCOMPLETE;

 			} else {
 				token = NULL;
 			}

-			add_sgml_attribute(stack, &name, token);
+			if (!add_sgml_attribute(stack, &name, token))
+				return SGML_PARSER_CODE_MEM_ALLOC;

 			/* Skip the value token */
 			if (token)
@ -250,19 +285,14 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
 		switch (token->type) {
 		case SGML_TOKEN_ELEMENT:
 		case SGML_TOKEN_ELEMENT_BEGIN:
-			if (!add_sgml_element(stack, token)) {
-				if (token->type == SGML_TOKEN_ELEMENT) {
-					skip_dom_scanner_token(scanner);
-					break;
-				}
-
-				skip_sgml_tokens(scanner, SGML_TOKEN_TAG_END);
-				break;
-			}
+			if (!add_sgml_element(stack, token))
+				return SGML_PARSER_CODE_MEM_ALLOC;

 			if (token->type == SGML_TOKEN_ELEMENT_BEGIN) {
 				enum sgml_parser_code code;

+				skip_dom_scanner_token(scanner);
+
 				code = parse_sgml_attributes(stack, scanner);
 				if (code != SGML_PARSER_CODE_OK)
 					return code;
@ -301,7 +331,8 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
 			break;

 		case SGML_TOKEN_NOTATION_COMMENT:
-			add_sgml_node(stack, DOM_NODE_COMMENT, token);
+			if (!add_sgml_node(stack, DOM_NODE_COMMENT, token))
+				return SGML_PARSER_CODE_MEM_ALLOC;
 			skip_dom_scanner_token(scanner);
 			break;

@ -314,7 +345,8 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
 			break;

 		case SGML_TOKEN_CDATA_SECTION:
-			add_sgml_node(stack, DOM_NODE_CDATA_SECTION, token);
+			if (!add_sgml_node(stack, DOM_NODE_CDATA_SECTION, token))
+				return SGML_PARSER_CODE_MEM_ALLOC;
 			skip_dom_scanner_token(scanner);
 			break;

@ -334,10 +366,10 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
 			assert(token->type == SGML_TOKEN_PROCESS_DATA);
 			/* Fall-through */

-		case SGML_TOKEN_PROCESS_DATA:
-			if (add_sgml_proc_instruction(stack, &target, token)
-			    && (target.type == SGML_TOKEN_PROCESS_XML
-			        || target.type == SGML_TOKEN_PROCESS_XML_STYLESHEET)
+			if (!add_sgml_proc_instruction(stack, &target, token))
+				return SGML_PARSER_CODE_MEM_ALLOC;
+			if ((target.type == SGML_TOKEN_PROCESS_XML
+			     || target.type == SGML_TOKEN_PROCESS_XML_STYLESHEET)
 			    && token->string.length > 0) {
 				/* Parse the <?xml data="attributes"?>. */
 				struct dom_scanner attr_scanner;
@ -402,13 +434,13 @@ parse_sgml(struct sgml_parser *parser, unsigned char *buf, size_t bufsize,
 		parser->flags |= SGML_PARSER_COMPLETE;

 	if (!parser->root) {
-		parser->root = add_sgml_document(&parser->stack, &parser->uri);
+		parser->root = add_sgml_document(parser);
 		if (!parser->root)
 			return SGML_PARSER_CODE_MEM_ALLOC;
 		get_dom_stack_top(&parser->stack)->immutable = 1;
 	}

-	node = init_dom_node(DOM_NODE_TEXT, &source);
+	node = init_dom_node(DOM_NODE_TEXT, &source, 0);
 	if (!node || push_dom_node(&parser->parsing, node) != DOM_STACK_CODE_OK)
 		return SGML_PARSER_CODE_MEM_ALLOC;

@ -452,15 +484,19 @@ sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data)
 		struct sgml_parsing_state *parent = &parsing[-1];

 		if (parent->resume) {
-			assert(is_dom_string_set(&parent->incomplete));
+			if (is_dom_string_set(&parent->incomplete)) {

-			if (!add_to_dom_string(&parent->incomplete,
-					       string->string, string->length)) {
-				parser->code = SGML_PARSER_CODE_MEM_ALLOC;
-				return DOM_STACK_CODE_OK;
+				if (!add_to_dom_string(&parent->incomplete,
+						       string->string,
+						       string->length)) {
+
+					parser->code = SGML_PARSER_CODE_MEM_ALLOC;
+					return DOM_STACK_CODE_OK;
+				}
+
+				string = &parent->incomplete;
 			}

-			string = &parent->incomplete;
 			scanner_state = parent->scanner.state;

 			/* Pop down to the parent. */
@ -474,19 +510,31 @@ sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data)
 			 scanner_state, count_lines, complete, incremental,
 			 detect_errors);

-	{
-		int immutable = get_dom_stack_top(&parser->stack)->immutable;
-
-		get_dom_stack_top(&parser->stack)->immutable = 1;
+	if (scanner_state == SGML_STATE_ELEMENT) {
+		parser->code = parse_sgml_attributes(&parser->stack, &parsing->scanner);
+		if (parser->code == SGML_PARSER_CODE_OK)
+			parser->code = parse_sgml_plain(&parser->stack, &parsing->scanner);
+	} else {
 		parser->code = parse_sgml_plain(&parser->stack, &parsing->scanner);
-		get_dom_stack_top(&parser->stack)->immutable = !!immutable;
 	}

-	if (complete || parser->code != SGML_PARSER_CODE_INCOMPLETE) {
+	if (complete) {
 		pop_dom_node(&parser->parsing);
 		return DOM_STACK_CODE_OK;
 	}

+	if (parser->code != SGML_PARSER_CODE_INCOMPLETE) {
+		/* No need to preserve the default scanner state. */
+		if (parsing->scanner.state == SGML_STATE_TEXT) {
+			pop_dom_node(&parser->parsing);
+			return DOM_STACK_CODE_OK;
+		}
+
+		done_dom_string(&parsing->incomplete);
+		parsing->resume = 1;
+		return DOM_STACK_CODE_OK;
+	}
+
 	token = get_dom_scanner_token(&parsing->scanner);
 	assert(token && token->type == SGML_TOKEN_INCOMPLETE);

@ -522,7 +570,7 @@ sgml_parsing_pop(struct dom_stack *stack, struct dom_node *node, void *data)
 		}
 		/* It's bigger than when calling done_sgml_parser() in the middle of an
 		 * incomplete parsing. */
-		assert(parsing->depth == parser->stack.depth);
+		assert(parsing->depth >= parser->stack.depth);	
 	}

 	done_dom_string(&parsing->incomplete);
--- a/src/dom/sgml/scanner.c
+++ b/src/dom/sgml/scanner.c
@ -449,13 +449,16 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 		if (scanner->state == SGML_STATE_ELEMENT) {
 			/* Already inside an element so insert a tag end token
 			 * and continue scanning in next iteration. */
-			string--;
-			real_length = 0;
 			type = SGML_TOKEN_TAG_END;
 			scanner_state = SGML_STATE_TEXT;

 			/* We are creating a 'virtual' that has no source. */
 			possibly_incomplete = 0;
+			string = token->string.string;
+			real_length = 0;
+
+		} else if (string == scanner->end) {
+			/* It is incomplete. */

 		} else if (is_sgml_ident(*string)) {
 			token->string.string = string;
@ -540,6 +543,29 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 				possibly_incomplete = 0;
 			}

+			if (scanner->check_complete && scanner->incomplete) {
+				/* We need to fit both the process target token
+				 * and the process data token into the scanner
+				 * table. */
+				if (token + 1 >= scanner->table + DOM_SCANNER_TOKENS) {
+					possibly_incomplete = 1;
+
+				} else if (!possibly_incomplete) {
+					/* FIXME: We do this twice. */
+					for (pos = string + 1;
+					     (pos = skip_sgml_chars(scanner, pos, '>'));
+					     pos++) {
+						if (pos[-1] == '?')
+							break;
+					}
+					if (!pos)
+						possibly_incomplete = 1;
+				}
+
+				if (possibly_incomplete)
+					string = scanner->end;
+			}
+
 		} else if (*string == '/') {
 			string++;
 			skip_sgml_space(scanner, &string);
@ -641,6 +667,10 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 			/* We found the end. */
 			possibly_incomplete = 0;

+		} else if (scanner->check_complete && scanner->incomplete) {
+			/* Force an incomplete token. */
+			string = scanner->end;
+
 		} else if (is_sgml_attribute(*string)) {
 			token->string.string++;
 			scan_sgml_attribute(scanner, string);
@ -698,9 +728,9 @@ scan_sgml_proc_inst_token(struct dom_scanner *scanner, struct dom_scanner_token
 {
 	unsigned char *string = scanner->position;
 	/* The length can be empty for '<??>'. */
-	size_t length = -1;
+	ssize_t length = -1;

-	token->string.string = string;
+	token->string.string = string++;

 	/* Figure out where the processing instruction ends. This doesn't use
 	 * skip_sgml() since we MUST ignore precedence here to allow '<' inside
--- a/src/dom/test/test-sgml-parser-basic
+++ b/src/dom/test/test-sgml-parser-basic
@ -56,7 +56,7 @@ element: root
    #text: a'

 test_output_equals \
-'Parse tag soup elements.' \
+'Parse tag soup elements. (I)' \
 '<parent attr="value" <child:1></><child:2</>a</parent>' \
 '
 element: parent
@ -65,6 +65,14 @@ element: parent
  element: child:2
  #text: a'

+test_output_equals \
+'Parse tag soup elements. (II)' \
+'< a >< b < c / >< / >' \
+'
+element: a
+  element: b
+    element: c'
+
 test_output_equals \
 'Parse an enclosed comment.' \
 '<root><!-- Hello World! --></root>' \
--- a/src/dom/test/test-sgml-parser-incremental
+++ b/src/dom/test/test-sgml-parser-incremental
@ -11,9 +11,8 @@ parsing.

 . "$TEST_LIB"

-test_output_equals () {
+test_incremental_parsing () {
 	desc="$1"; shift
-	size="$1"; shift
 	src="$1"; shift
 	out="$1"; shift

@ -25,25 +24,242 @@ test_output_equals () {
 		y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
 		s/[^a-zA-Z0-9-]//g;')"

-	echo "$src" | sgml-parser --uri "$URI" --stdin "$size" \
-	| sed -e 's/^  //' | sed -n '$d;p' > output
 	echo "#document: $URI" > expected
 	echo "$out" | sed -n '2,$p' >> expected

-	test_expect_success "$desc" 'cmp output expected' 
+	for size in 1 2 3 4 5 6 7 8 9 10 15 20 25 50; do
+		echo -n "$src" | sgml-parser --uri "$URI" --stdin "$size" \
+		| sed -e 's/^  //' > output
+
+		test_run_ 'cmp output expected'
+		if [ "$?" != 0 -o "$eval_ret" != 0 ]
+		then
+			test_failure_ "$desc" "($size bytes)"
+			return
+		fi
+	done
+
+	test_ok_ "$desc"
 }

-for i in 25 20 15 10 9 8 7 6 5 4 3 2 1; do
-	test_output_equals \
-	"Incrementally parse a small document reading $i bytes at a time." \
-	"$i" \
-	'<html><body><p>Hello World!</p></body></html>' \
+test_incremental_parsing \
+"Parse a small document." \
+'<html><body><p>Hello World!</p></body></html>' \
 	'
 element: html
  element: body
    element: p
      #text: Hello World!'

-done
+test_incremental_parsing \
+'Parse elements.' \
+'<root><child attr="value" /><child2></><child3 >a</></root>' \
+'
+element: root
+  element: child
+    attribute: attr -> value
+  element: child2
+  element: child3
+    #text: a'
+
+test_incremental_parsing \
+'Parse tag soup elements.' \
+'<parent attr="value" <child:1></><child:2</>a</parent>' \
+'
+element: parent
+  attribute: attr -> value
+  element: child:1
+  element: child:2
+  #text: a'
+
+test_incremental_parsing \
+'Parse an enclosed comment.' \
+'<root><!-- Hello World! --></root>' \
+'
+element: root
+  #comment:  Hello World! '
+
+test_incremental_parsing \
+'Parse comment combinations. (I)' \
+'<root><!-- <!-- -- > --><!--foo--><!----></root>' \
+'
+element: root
+  #comment:  <!-- -- > 
+  #comment: foo
+  #comment: '
+
+test_incremental_parsing \
+'Parse comment combinations. (II).' \
+'<! -- comment -->s<!-->-->t<!----->u' \
+'
+#comment:  comment 
+#text: s
+#comment: >
+#text: t
+#comment: -
+#text: u'
+
+test_incremental_parsing \
+'Parse bad comment. (I)' \
+'<!--->s' \
+'
+#comment: ->s'
+
+test_incremental_parsing \
+'Parse bad comment. (II)' \
+'<!--a--!>bad comment' \
+'
+#comment: a
+#text: bad comment'
+
+test_incremental_parsing \
+'Parse empty notation.' \
+'<!>s' \
+'
+#text: s'
+
+test_incremental_parsing \
+'Parse an enclosed CDATA section.' \
+'<root><![CDATA[...] ]>...]]></root>' \
+'
+element: root
+  #cdata-section: ...] ]>...'
+
+test_incremental_parsing \
+'Parse non-enclosed CDATA section.' \
+'<![CDATA[...]]>' \
+'
+#cdata-section: ...'
+
+test_incremental_parsing \
+'Parse a bad CDATA section.' \
+'<![CDATA[...' \
+'
+#cdata-section: ...'
+
+test_incremental_parsing \
+'Parse attributes.' \
+'<root lang="fr" attr name="value with &foo; <stuff"></root>' \
+'
+element: root
+  attribute: lang -> fr
+  attribute: attr -> 
+  attribute: name -> value with &foo; <stuff'
+
+test_incremental_parsing \
+'Parse attributes with garbage.' \
+"<root a=b c='d' e'f' g= h i = j k =></root>" \
+'
+element: root
+  attribute: a -> b
+  attribute: c -> d
+  attribute: g -> h
+  attribute: i -> j
+  attribute: k -> ' 
+
+test_incremental_parsing \
+'Parse attribute with non-quoted values.' \
+'<root color=#abc path=/to/%61-&\one";files/>...' \
+'
+element: root
+  attribute: color -> #abc
+  attribute: path -> /to/%61-&\one";files
+#text: ...'
+
+test_incremental_parsing \
+'Parse entity references.' \
+'&amp;-&#42;' \
+'
+entity-reference: amp
+#text: -
+entity-reference: #42'
+
+# Just how these should be gracefully handled is not clear to me.
+test_incremental_parsing \
+'Parse badly formatted entity references.' \
+'& m33p;-&.:-copy;-&;-&#;-&#xx;' \
+'
+#text: & m33p;
+#text: -
+entity-reference: .:-copy
+#text: -
+#text: &;
+#text: -
+entity-reference: #
+#text: -
+entity-reference: #xx'
+
+test_incremental_parsing \
+'Parse processing instructions.' \
+'<?xml encoding="UTF8"?>
+...
+<?ecmascript
+var val=2;
+?>' \
+'
+proc-instruction: xml -> encoding="UTF8"
+  attribute: encoding -> UTF8
+#text: \n...\n
+proc-instruction: ecmascript -> var val=2;\n'
+
+test_incremental_parsing \
+'Parse XML processing instructions.' \
+'<?xml version="1.0" />?><?xml />-' \
+'
+proc-instruction: xml -> version="1.0" />
+  attribute: version -> 1.0
+proc-instruction: xml -> />-'
+
+test_incremental_parsing \
+'Parse XML stylesheet processing instructions.' \
+'<?xml-stylesheet type="text/xsl" href="url"?>' \
+'
+proc-instruction: xml-stylesheet -> type="text/xsl" href="url"
+  attribute: type -> text/xsl
+  attribute: href -> url'
+
+test_incremental_parsing \
+'Parse exotic processing instructions.' \
+'<?xml ?+>+?>-?>-<?js?>-<??>-' \
+'
+proc-instruction: xml -> ?+>+
+#text: -?>-
+proc-instruction: js -> 
+#text: -
+proc-instruction:  -> 
+#text: -'
+
+test_incremental_parsing \
+'Parse incorrect processing instructions.' \
+'<?js<?>-<?<??>-<?xml <=";&?>-<?' \
+'
+proc-instruction: js -> <
+#text: -
+proc-instruction:  -> <?
+#text: -
+proc-instruction: xml -> <=";&
+#text: -'
+
+test_incremental_parsing \
+'Parse incorrect processing instructions (II).' \
+'<?><?' \
+'
+proc-instruction:  -> ><?'
+
+test_incremental_parsing \
+'Skip spaces not inside text.' \
+'<
+root
+ns:attr                      
+=
+"value"
+><?	
+	target	
+ data?><	/	root	>' \
+'
+element: root
+  attribute: ns:attr -> value
+  proc-instruction: target -> data'
+

 test_done
--- a/src/protocol/fsp/fsp.c
+++ b/src/protocol/fsp/fsp.c
@ -296,7 +296,7 @@ end:
 		abort_connection(conn, S_OUT_OF_MEM);
 		return;
 	}
-	read_from_socket(conn->data_socket, buf, S_CONN, fsp_got_data); 
+	read_from_socket(conn->data_socket, buf, S_CONN, fsp_got_data);
 }

 #undef READ_SIZE
--- a/src/scripting/smjs/Makefile
+++ b/src/scripting/smjs/Makefile
@ -4,6 +4,6 @@ include $(top_builddir)/Makefile.config
 INCLUDES += $(SPIDERMONKEY_CFLAGS)

 OBJS = smjs.o core.o global_object.o hooks.o elinks_object.o cache_object.o \
-       bookmarks.o keybinding.o
+       view_state_object.o bookmarks.o keybinding.o

 include $(top_srcdir)/Makefile.lib
--- a/src/scripting/smjs/hooks.c
+++ b/src/scripting/smjs/hooks.c
@ -12,10 +12,13 @@
 #include "main/event.h"
 #include "main/module.h"
 #include "scripting/smjs/cache_object.h"
+#include "scripting/smjs/view_state_object.h"
 #include "scripting/smjs/core.h"
 #include "scripting/smjs/elinks_object.h"
 #include "scripting/smjs/hooks.h"
+#include "session/location.h"
 #include "session/session.h"
+#include "viewer/text/vs.h"


 static enum evhook_status
@ -88,8 +91,8 @@ script_hook_pre_format_html(va_list ap, void *data)
 	struct session *ses = va_arg(ap, struct session *);
 	struct cache_entry *cached = va_arg(ap, struct cache_entry *);
 	enum evhook_status ret = EVENT_HOOK_STATUS_NEXT;
-	JSObject *cache_entry_object;
-	jsval args[1], rval;
+	JSObject *cache_entry_object, *view_state_object = JSVAL_NULL;
+	jsval args[2], rval;

 	evhook_use_params(ses && cached);

@ -97,13 +100,20 @@ script_hook_pre_format_html(va_list ap, void *data)

 	smjs_ses = ses;

+	if (have_location(ses)) {
+		struct view_state *vs = &cur_loc(ses)->vs;
+
+		view_state_object = smjs_get_view_state_object(vs);
+	}
+
 	cache_entry_object = smjs_get_cache_entry_object(cached);
 	if (!cache_entry_object) goto end;

 	args[0] = OBJECT_TO_JSVAL(cache_entry_object);
+	args[1] = OBJECT_TO_JSVAL(view_state_object);

 	if (JS_TRUE == smjs_invoke_elinks_object_method("preformat_html",
-	                                                args, 1, &rval))
+	                                                args, 2, &rval))
 		if (JS_FALSE == JSVAL_TO_BOOLEAN(rval))
 			ret = EVENT_HOOK_STATUS_LAST;

--- a/src/scripting/smjs/view_state_object.c
+++ b/src/scripting/smjs/view_state_object.c
@ -0,0 +1,109 @@
+/* Exports struct view_state to the world of ECMAScript */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+
+#include "elinks.h"
+
+#include "ecmascript/spidermonkey/util.h"
+#include "protocol/uri.h"
+#include "scripting/smjs/view_state_object.h"
+#include "scripting/smjs/core.h"
+#include "util/error.h"
+#include "util/memory.h"
+#include "viewer/text/vs.h"
+
+enum view_state_prop {
+	VIEW_STATE_PLAIN,
+	VIEW_STATE_URI,
+};
+
+static const JSPropertySpec view_state_props[] = {
+	{ "plain", VIEW_STATE_PLAIN, JSPROP_ENUMERATE },
+	{ "uri",   VIEW_STATE_URI,   JSPROP_ENUMERATE | JSPROP_READONLY },
+	{ NULL }
+};
+
+static JSBool
+view_state_get_property(JSContext *ctx, JSObject *obj, jsval id, jsval *vp)
+{
+	struct view_state *vs = JS_GetPrivate(ctx, obj);
+
+	undef_to_jsval(ctx, vp);
+
+	if (!JSVAL_IS_INT(id))
+		return JS_FALSE;
+
+	switch (JSVAL_TO_INT(id)) {
+	case VIEW_STATE_PLAIN:
+		*vp = INT_TO_JSVAL(vs->plain);
+
+		return JS_TRUE;
+	case VIEW_STATE_URI:
+		*vp = STRING_TO_JSVAL(JS_NewStringCopyZ(smjs_ctx,
+		                                        struri(vs->uri)));
+
+		return JS_TRUE;
+	default:
+		INTERNAL("Invalid ID %d in view_state_get_property().",
+		         JSVAL_TO_INT(id));
+	}
+
+	return JS_FALSE;
+}
+
+static JSBool
+view_state_set_property(JSContext *ctx, JSObject *obj, jsval id, jsval *vp)
+{
+	struct view_state *vs = JS_GetPrivate(ctx, obj);
+
+	if (!JSVAL_IS_INT(id))
+		return JS_FALSE;
+
+	switch (JSVAL_TO_INT(id)) {
+	case VIEW_STATE_PLAIN: {
+		vs->plain = atol(jsval_to_string(ctx, vp));
+
+		return JS_TRUE;
+	}
+	default:
+		INTERNAL("Invalid ID %d in view_state_set_property().",
+		         JSVAL_TO_INT(id));
+	}
+
+	return JS_FALSE;
+}
+
+static const JSClass view_state_class = {
+	"view_state",
+	JSCLASS_HAS_PRIVATE,
+	JS_PropertyStub, JS_PropertyStub,
+	view_state_get_property, view_state_set_property,
+	JS_EnumerateStub, JS_ResolveStub, JS_ConvertStub, JS_FinalizeStub
+};
+
+JSObject *
+smjs_get_view_state_object(struct view_state *vs)
+{
+	JSObject *view_state_object;
+
+	assert(smjs_ctx);
+
+	view_state_object = JS_NewObject(smjs_ctx,
+	                                  (JSClass *) &view_state_class,
+	                                  NULL, NULL);
+
+	if (!view_state_object) return NULL;
+
+	if (JS_FALSE == JS_SetPrivate(smjs_ctx, view_state_object, vs))
+		return NULL;
+
+	if (JS_FALSE == JS_DefineProperties(smjs_ctx, view_state_object,
+	                               (JSPropertySpec *) view_state_props))
+		return NULL;
+
+	return view_state_object;
+}
--- a/src/scripting/smjs/view_state_object.h
+++ b/src/scripting/smjs/view_state_object.h
@ -0,0 +1,9 @@
+#ifndef EL__SCRIPTING_SMJS_VIEW_STATE_OBJECT_H
+#define EL__SCRIPTING_SMJS_VIEW_STATE_OBJECT_H
+
+struct view_state;
+
+JSObject *smjs_get_view_state_object(struct view_state *vs);
+
+#endif
+