Merge with git+ssh://pasky.or.cz/srv/git/elinks.git

2025-04-18 00:47:36 -04:00 · 2006-01-27 18:06:31 +01:00 · 2006-01-27 18:06:31 +01:00 · 1cd1786f9c
commit 1cd1786f9c
parent 7a6a5918fe 0f8aa77ebb
8 changed files with 244 additions and 30 deletions
--- a/Makefile.config.in
+++ b/Makefile.config.in
@ -46,6 +46,7 @@ INSTALL_SCRIPT = @INSTALL_SCRIPT@
 host = @host@
 ASCIIDOC = @ASCIIDOC@
 ASCIIDOC_FLAGS = @ASCIIDOC_FLAGS@
 AWK = @AWK@
 CATALOGS = @CATALOGS@
 CC = @CC@
--- a/configure.in
+++ b/configure.in
@ -66,6 +66,12 @@ if test "x$CONFIG_DOC" != xno; then
 		EL_CONFIG(CONFIG_ASCIIDOC, [AsciiDoc])
 		EL_CONFIG(MANUAL_ASCIIDOC, [HTML (one file)])
 		EL_CONFIG(MAN_ASCIIDOC, [HTML])
 		echo > config.asciidoc-unsafe.txt
 		if "$ASCIIDOC" --unsafe config.asciidoc-unsafe.txt >&/dev/null; then
 			ASCIIDOC_FLAGS=--unsafe
 		fi
 		rm config.asciidoc-unsafe.*
 	fi
 	AC_PATH_PROGS(XMLTO, "xmlto")
@ -87,6 +93,7 @@ if test "x$CONFIG_DOC" != xno; then
 	fi
 fi
 AC_SUBST(ASCIIDOC_FLAGS)
 AC_SUBST(CONFIG_ASCIIDOC)
 AC_SUBST(CONFIG_POD2HTML)
 AC_SUBST(CONFIG_XMLTO)
--- a/doc/Makefile
+++ b/doc/Makefile
@ -13,7 +13,7 @@ HTML_DIR = $(DESTDIR)$(docdir)/$(PACKAGE)/html
 PDF_DIR  = $(DESTDIR)$(docdir)/$(PACKAGE)/pdf
 ASCIIDOC_CONF  = $(srcdir)asciidoc.conf
-ASCIIDOC_FLAGS = -f $(ASCIIDOC_CONF) -a "builddir=$(CURDIR)/"
+ASCIIDOC_FLAGS += -f $(ASCIIDOC_CONF) -a "builddir=$(CURDIR)/"
 #############################################################################
 # Build files
--- a/doc/README
+++ b/doc/README
@ -57,7 +57,8 @@ in this directory or it's children.
   submitting patches etc., thus every aspiring developer should take the
   pains to read through it, do not forget to also look for README and similar
   text files in the subdirectories containing the relevant sources for
-   detailed notes regarding given modules/subsystems.
+   detailed notes regarding given modules/subsystems. Additionally, it is
   possible to build API docs. More about this below.
 	The Lua Scripting Book ...................... lua-scripting.txt
 	Events Reference Sheet ...................... events.txt
@ -94,7 +95,10 @@ and the following man page formats:
 - HTML (asciidoc)
 - man / groff (asciidoc + xmlto)
-Note: You do not need to build manpages. They are shipped with ELinks.
+Note: You do not need to build manpages. They are shipped with ELinks. However,
 if you want to have the manpages to match your local configuration and changes
 you can rebuild them (this is mostly an issue with elinks.conf(5) which might
 otherwise contain options that is not supported by the version you install.
 Note: You must first build the ELinks binary for "make all-docs" to work
 successfully. The binary is used for getting option documentation.
@ -103,6 +107,43 @@ The documentation can be installed with:
 	$ make install-doc
 Building API documentation
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 There is some starting effort to make it possible to build HTML documentation
 of the APIs presented by the different modules and subsystems in ELinks. To
 build API documentation run:
 	$ make api
 in the doc/ directory. The API documentation can then be found in the doc/api/
 directory.
 NOTE: Currently only few files provides API docs and there is no over-all
 structure of the various APIs.
 The API toolchain uses a Perl script (doc/tools/code2doc) to extract info from
 header files and generate text files with AsciiDoc markup. The text files are
 then converted to HTML with AsciiDoc.
 To get an idea of how the code markup works take a look at src/dom/stack.h.
 It has a small tag saying that it provides API docs for the dom-stack module:
 	/* API Doc :: dom-stack */
 The API doc markup should be pretty straight forward. Here is an example of the
 basic structure:
 	/** <title>
 	 *
 	 * <content>
 	 */
 Only text in comments starting with '/**' are used. If the comment immediately
 preceeds a declaration of some struct, enum, typedef, function, or macro, the
 name of the declared identifier will be used when creating the output to create
 anchors which can be referred to using ref:[].
 Contributing
 ------------
--- a/src/dom/sgml/parser.c
+++ b/src/dom/sgml/parser.c
@ -412,8 +412,6 @@ parse_sgml(struct sgml_parser *parser, unsigned char *buf, size_t bufsize,
 	if (!node || push_dom_node(&parser->parsing, node) != DOM_STACK_CODE_OK)
 		return SGML_PARSER_CODE_MEM_ALLOC;
 	pop_dom_node(&parser->parsing);
 	return parser->code;
 }
@ -429,7 +427,9 @@ parse_sgml(struct sgml_parser *parser, unsigned char *buf, size_t bufsize,
 struct sgml_parsing_state {
 	struct dom_scanner scanner;
 	struct dom_node *node;
 	struct dom_string incomplete;
 	size_t depth;
 	unsigned int resume:1;
 };
 enum dom_stack_code
@ -441,13 +441,67 @@ sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data)
 	int complete = !!(parser->flags & SGML_PARSER_COMPLETE);
 	int incremental = !!(parser->flags & SGML_PARSER_INCREMENTAL);
 	int detect_errors = !!(parser->flags & SGML_PARSER_DETECT_ERRORS);
 	struct dom_string *string = &node->string;
 	struct dom_scanner_token *token;
 	struct dom_string incomplete;
 	enum sgml_scanner_state scanner_state = SGML_STATE_TEXT;
 	parsing->depth = parser->stack.depth;
-	get_dom_stack_top(&parser->stack)->immutable = 1;
+
-	init_dom_scanner(&parsing->scanner, &sgml_scanner_info, &node->string,
+	if (stack->depth > 1) {
-			 SGML_STATE_TEXT, count_lines, complete, incremental,
+		struct sgml_parsing_state *parent = &parsing[-1];
 		if (parent->resume) {
 			assert(is_dom_string_set(&parent->incomplete));
 			if (!add_to_dom_string(&parent->incomplete,
 					       string->string, string->length)) {
 				parser->code = SGML_PARSER_CODE_MEM_ALLOC;
 				return DOM_STACK_CODE_OK;
 			}
 			string = &parent->incomplete;
 			scanner_state = parent->scanner.state;
 			/* Pop down to the parent. */
 			parsing = parent;
 			parsing->resume = 0;
 			pop_dom_node(stack);
 		}
 	}
 	init_dom_scanner(&parsing->scanner, &sgml_scanner_info, string,
 			 scanner_state, count_lines, complete, incremental,
 			 detect_errors);
-	parser->code = parse_sgml_plain(&parser->stack, &parsing->scanner);
+
 	{
 		int immutable = get_dom_stack_top(&parser->stack)->immutable;
 		get_dom_stack_top(&parser->stack)->immutable = 1;
 		parser->code = parse_sgml_plain(&parser->stack, &parsing->scanner);
 		get_dom_stack_top(&parser->stack)->immutable = !!immutable;
 	}
 	if (complete || parser->code != SGML_PARSER_CODE_INCOMPLETE) {
 		pop_dom_node(&parser->parsing);
 		return DOM_STACK_CODE_OK;
 	}
 	token = get_dom_scanner_token(&parsing->scanner);
 	assert(token && token->type == SGML_TOKEN_INCOMPLETE);
 	string = &token->string;
 	set_dom_string(&incomplete, NULL, 0);
 	if (!init_dom_string(&incomplete, string->string, string->length)) {
 		parser->code = SGML_PARSER_CODE_MEM_ALLOC;
 		return DOM_STACK_CODE_OK;
 	}
 	done_dom_string(&parsing->incomplete);
 	set_dom_string(&parsing->incomplete, incomplete.string, incomplete.length);
 	parsing->resume = 1;
 	return DOM_STACK_CODE_OK;
 }
@ -458,14 +512,20 @@ sgml_parsing_pop(struct dom_stack *stack, struct dom_node *node, void *data)
 	struct sgml_parser *parser = get_sgml_parser(stack);
 	struct sgml_parsing_state *parsing = data;
-	/* Pop the stack back to the state it was in. This includes cleaning
+	/* Only clean up the stack if complete so that we get proper nesting. */
-	 * away even immutable states left on the stack. */
+	if (parser->flags & SGML_PARSER_COMPLETE) {
-	while (parsing->depth < parser->stack.depth) {
+		/* Pop the stack back to the state it was in. This includes cleaning
-		get_dom_stack_top(&parser->stack)->immutable = 0;
+		 * away even immutable states left on the stack. */
-		pop_dom_node(&parser->stack);
+		while (parsing->depth < parser->stack.depth) {
 			get_dom_stack_top(&parser->stack)->immutable = 0;
 			pop_dom_node(&parser->stack);
 		}
 		/* It's bigger than when calling done_sgml_parser() in the middle of an
 		 * incomplete parsing. */
 		assert(parsing->depth == parser->stack.depth);
 	}
-	assert(parsing->depth == parser->stack.depth);
+	done_dom_string(&parsing->incomplete);
 	return DOM_STACK_CODE_OK;
 }
@ -611,8 +671,10 @@ init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype,
 void
 done_sgml_parser(struct sgml_parser *parser)
 {
-	done_dom_stack(&parser->stack);
+	while (!dom_stack_is_empty(&parser->parsing))
 		pop_dom_node(&parser->parsing);
 	done_dom_stack(&parser->parsing);
 	done_dom_stack(&parser->stack);
 	done_dom_string(&parser->uri);
 	mem_free(parser);
 }
--- a/src/dom/sgml/scanner.c
+++ b/src/dom/sgml/scanner.c
@ -439,6 +439,7 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 	enum sgml_token_type type = SGML_TOKEN_GARBAGE;
 	int real_length = -1;
 	int possibly_incomplete = 1;
 	enum sgml_scanner_state scanner_state = scanner->state;
 	token->string.string = string++;
@ -451,7 +452,7 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 			string--;
 			real_length = 0;
 			type = SGML_TOKEN_TAG_END;
-			scanner->state = SGML_STATE_TEXT;
+			scanner_state = SGML_STATE_TEXT;
 			/* We are creating a 'virtual' that has no source. */
 			possibly_incomplete = 0;
@ -476,8 +477,8 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 					/* We found the end. */
 					possibly_incomplete = 0;
 				}
 				scanner->state = SGML_STATE_ELEMENT;
 				type = SGML_TOKEN_ELEMENT_BEGIN;
 				scanner_state = SGML_STATE_ELEMENT;
 			}
 		} else if (*string == '!') {
@ -527,7 +528,7 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 			type = map_dom_scanner_string(scanner, pos, string, base);
-			scanner->state = SGML_STATE_PROC_INST;
+			scanner_state = SGML_STATE_PROC_INST;
 			real_length = string - token->string.string;
 			skip_sgml_space(scanner, &string);
@ -563,8 +564,9 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 				possibly_incomplete = 0;
 			}
-			if (type != SGML_TOKEN_GARBAGE)
+			if (type != SGML_TOKEN_GARBAGE) {
-				scanner->state = SGML_STATE_TEXT;
+				scanner_state = SGML_STATE_TEXT;
 			}
 		} else {
 			/* Alien < > stuff so ignore it */
@ -594,7 +596,7 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 		type = SGML_TOKEN_TAG_END;
 		assert(scanner->state == SGML_STATE_ELEMENT);
-		scanner->state = SGML_STATE_TEXT;
+		scanner_state = SGML_STATE_TEXT;
 	} else if (first_char == '/') {
 		/* We allow '/' inside elements and only consider it as an end
@ -611,7 +613,7 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 			real_length = 0;
 			type = SGML_TOKEN_ELEMENT_EMPTY_END;
 			assert(scanner->state == SGML_STATE_ELEMENT);
-			scanner->state = SGML_STATE_TEXT;
+			scanner_state = SGML_STATE_TEXT;
 			/* We found the end. */
 			possibly_incomplete = 0;
@ -678,6 +680,10 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 		}
 	}
 	/* Only apply the state change if the token was not abandoned because
 	 * it was incomplete. */
 	scanner->state = scanner_state;
 	token->type = type;
 	token->string.length = real_length >= 0 ? real_length : string - token->string.string;
 	token->precedence = get_sgml_precedence(type);
--- a/src/dom/test/sgml-parser.c
+++ b/src/dom/test/sgml-parser.c
@ -266,6 +266,7 @@ main(int argc, char *argv[])
 	enum dom_config_flag normalize_flags = 0;
 	int normalize = 0;
 	int complete = 1;
 	size_t read_stdin = 0;
 	struct dom_string uri = INIT_DOM_STRING("dom://test", -1);
 	struct dom_string source = INIT_DOM_STRING("(no source)", -1);
 	int i;
@ -302,6 +303,20 @@ main(int argc, char *argv[])
 				set_dom_string(&source, argv[i], strlen(argv[i]));
 			}
 		} else if (!strncmp(arg, "stdin", 5)) {
 			arg += 5;
 			if (*arg == '=') {
 				arg++;
 				read_stdin = atoi(arg);
 				set_dom_string(&source, arg, strlen(arg));
 			} else {
 				i++;
 				if (i >= argc)
 					die("--stdin expects a number");
 				read_stdin = atoi(argv[i]);
 			}
 			flags |= SGML_PARSER_INCREMENTAL;
 		} else if (!strncmp(arg, "normalize", 9)) {
 			arg += 9;
 			if (*arg == '=') {
@ -343,16 +358,49 @@ main(int argc, char *argv[])
 	else
 		add_dom_stack_context(&parser->stack, NULL, &sgml_parser_test_context_info);
-	code = parse_sgml(parser, source.string, source.length, complete);
+	if (read_stdin > 0) {
 		unsigned char *buffer;
 		buffer = mem_alloc(read_stdin);
 		if (!buffer)
 			die("Cannot allocate buffer");
 		complete = 0;
 		while (!complete) {
 			size_t size = fread(buffer, 1, read_stdin, stdin);
 			if (ferror(stdin))
 				die("error reading from stdin");
 			complete = feof(stdin);
 			code = parse_sgml(parser, buffer, size, complete);
 			switch (code) {
 			case SGML_PARSER_CODE_OK:
 				break;
 			case SGML_PARSER_CODE_INCOMPLETE:
 				if (!complete) break;
 				/* Error */
 			default:
 				complete = 1;
 			}
 		}
 		mem_free(buffer);
 	} else {
 		code = parse_sgml(parser, source.string, source.length, complete);
 	}
 	if (parser->root) {
-		size_t root_offset = parser->stack.depth - 1;
+		assert(!complete || parser->stack.depth > 0);
-		assert(!complete || root_offset == 0);
+		while (!dom_stack_is_empty(&parser->stack)) {
-
+			get_dom_stack_top(&parser->stack)->immutable = 0;
 		get_dom_stack_state(&parser->stack, root_offset)->immutable = 0;
 		while (!dom_stack_is_empty(&parser->stack))
 			pop_dom_node(&parser->stack);
 		}
 		if (normalize) {
 			struct dom_stack stack;
--- a/src/dom/test/test-sgml-parser-incremental
+++ b/src/dom/test/test-sgml-parser-incremental
@ -0,0 +1,49 @@
 #!/bin/sh
 #
 # Copyright (c) 2005 Jonas Fonseca
 #
 test_description='Test incremental parsing of SGML documents.
 This test checks if the SGML parser correctly recovers during incremental
 parsing.
 '
 . "$TEST_LIB"
 test_output_equals () {
 	desc="$1"; shift
 	size="$1"; shift
 	src="$1"; shift
 	out="$1"; shift
 	URI="test:$(echo "$desc" | sed '
 		s/^[ \t]*\[[^]]*\][ \t]*//;
 		s/[:., \t][:., \t]*/-/g;
 		s/_/-/g;
 		# *cough*
 		y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
 		s/[^a-zA-Z0-9-]//g;')"
 	echo "$src" | sgml-parser --uri "$URI" --stdin "$size" \
 	| sed -e 's/^  //' | sed -n '$d;p' > output
 	echo "#document: $URI" > expected
 	echo "$out" | sed -n '2,$p' >> expected
 	test_expect_success "$desc" 'cmp output expected' 
 }
 for i in 25 20 15 10 9 8 7 6 5 4 3 2 1; do
 	test_output_equals \
 	"Incrementally parse a small document reading $i bytes at a time." \
 	"$i" \
 	'<html><body><p>Hello World!</p></body></html>' \
 	'
 element: html
  element: body
    element: p
      #text: Hello World!'
 done
 test_done