Merge with git+ssh://pasky.or.cz/srv/git/elinks.git

2025-02-02 15:09:23 -05:00 · 2006-01-27 18:06:31 +01:00 · 2006-01-27 18:06:31 +01:00 · 1cd1786f9c
commit 1cd1786f9c
parent 7a6a5918fe 0f8aa77ebb
8 changed files with 244 additions and 30 deletions
--- a/Makefile.config.in
+++ b/Makefile.config.in
@ -46,6 +46,7 @@ INSTALL_SCRIPT = @INSTALL_SCRIPT@
 host = @host@

 ASCIIDOC = @ASCIIDOC@
+ASCIIDOC_FLAGS = @ASCIIDOC_FLAGS@
 AWK = @AWK@
 CATALOGS = @CATALOGS@
 CC = @CC@
--- a/configure.in
+++ b/configure.in
@ -66,6 +66,12 @@ if test "x$CONFIG_DOC" != xno; then
 		EL_CONFIG(CONFIG_ASCIIDOC, [AsciiDoc])
 		EL_CONFIG(MANUAL_ASCIIDOC, [HTML (one file)])
 		EL_CONFIG(MAN_ASCIIDOC, [HTML])
+
+		echo > config.asciidoc-unsafe.txt
+		if "$ASCIIDOC" --unsafe config.asciidoc-unsafe.txt >&/dev/null; then
+			ASCIIDOC_FLAGS=--unsafe
+		fi
+		rm config.asciidoc-unsafe.*
 	fi

 	AC_PATH_PROGS(XMLTO, "xmlto")
@ -87,6 +93,7 @@ if test "x$CONFIG_DOC" != xno; then
 	fi
 fi

+AC_SUBST(ASCIIDOC_FLAGS)
 AC_SUBST(CONFIG_ASCIIDOC)
 AC_SUBST(CONFIG_POD2HTML)
 AC_SUBST(CONFIG_XMLTO)
--- a/doc/Makefile
+++ b/doc/Makefile
@ -13,7 +13,7 @@ HTML_DIR = $(DESTDIR)$(docdir)/$(PACKAGE)/html
 PDF_DIR  = $(DESTDIR)$(docdir)/$(PACKAGE)/pdf

 ASCIIDOC_CONF  = $(srcdir)asciidoc.conf
-ASCIIDOC_FLAGS = -f $(ASCIIDOC_CONF) -a "builddir=$(CURDIR)/"
+ASCIIDOC_FLAGS += -f $(ASCIIDOC_CONF) -a "builddir=$(CURDIR)/"

 #############################################################################
 # Build files
--- a/doc/README
+++ b/doc/README
@ -57,7 +57,8 @@ in this directory or it's children.
   submitting patches etc., thus every aspiring developer should take the
   pains to read through it, do not forget to also look for README and similar
   text files in the subdirectories containing the relevant sources for
-   detailed notes regarding given modules/subsystems.
+   detailed notes regarding given modules/subsystems. Additionally, it is
+   possible to build API docs. More about this below.

 	The Lua Scripting Book ...................... lua-scripting.txt
 	Events Reference Sheet ...................... events.txt
@ -94,7 +95,10 @@ and the following man page formats:
 - HTML (asciidoc)
 - man / groff (asciidoc + xmlto)

-Note: You do not need to build manpages. They are shipped with ELinks.
+Note: You do not need to build manpages. They are shipped with ELinks. However,
+if you want to have the manpages to match your local configuration and changes
+you can rebuild them (this is mostly an issue with elinks.conf(5) which might
+otherwise contain options that is not supported by the version you install.

 Note: You must first build the ELinks binary for "make all-docs" to work
 successfully. The binary is used for getting option documentation.
@ -103,6 +107,43 @@ The documentation can be installed with:

 	$ make install-doc

+Building API documentation
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+There is some starting effort to make it possible to build HTML documentation
+of the APIs presented by the different modules and subsystems in ELinks. To
+build API documentation run:
+
+	$ make api
+
+in the doc/ directory. The API documentation can then be found in the doc/api/
+directory.
+
+NOTE: Currently only few files provides API docs and there is no over-all
+structure of the various APIs.
+
+The API toolchain uses a Perl script (doc/tools/code2doc) to extract info from
+header files and generate text files with AsciiDoc markup. The text files are
+then converted to HTML with AsciiDoc.
+
+To get an idea of how the code markup works take a look at src/dom/stack.h.
+It has a small tag saying that it provides API docs for the dom-stack module:
+
+	/* API Doc :: dom-stack */
+
+The API doc markup should be pretty straight forward. Here is an example of the
+basic structure:
+
+	/** <title>
+	 *
+	 * <content>
+	 */
+
+Only text in comments starting with '/**' are used. If the comment immediately
+preceeds a declaration of some struct, enum, typedef, function, or macro, the
+name of the declared identifier will be used when creating the output to create
+anchors which can be referred to using ref:[].
+

 Contributing
 ------------
--- a/src/dom/sgml/parser.c
+++ b/src/dom/sgml/parser.c
@ -412,8 +412,6 @@ parse_sgml(struct sgml_parser *parser, unsigned char *buf, size_t bufsize,
 	if (!node || push_dom_node(&parser->parsing, node) != DOM_STACK_CODE_OK)
 		return SGML_PARSER_CODE_MEM_ALLOC;

-	pop_dom_node(&parser->parsing);
-
 	return parser->code;
 }

@ -429,7 +427,9 @@ parse_sgml(struct sgml_parser *parser, unsigned char *buf, size_t bufsize,
 struct sgml_parsing_state {
 	struct dom_scanner scanner;
 	struct dom_node *node;
+	struct dom_string incomplete;
 	size_t depth;
+	unsigned int resume:1;
 };

 enum dom_stack_code
@ -441,13 +441,67 @@ sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data)
 	int complete = !!(parser->flags & SGML_PARSER_COMPLETE);
 	int incremental = !!(parser->flags & SGML_PARSER_INCREMENTAL);
 	int detect_errors = !!(parser->flags & SGML_PARSER_DETECT_ERRORS);
+	struct dom_string *string = &node->string;
+	struct dom_scanner_token *token;
+	struct dom_string incomplete;
+	enum sgml_scanner_state scanner_state = SGML_STATE_TEXT;

 	parsing->depth = parser->stack.depth;
-	get_dom_stack_top(&parser->stack)->immutable = 1;
-	init_dom_scanner(&parsing->scanner, &sgml_scanner_info, &node->string,
-			 SGML_STATE_TEXT, count_lines, complete, incremental,
+
+	if (stack->depth > 1) {
+		struct sgml_parsing_state *parent = &parsing[-1];
+
+		if (parent->resume) {
+			assert(is_dom_string_set(&parent->incomplete));
+
+			if (!add_to_dom_string(&parent->incomplete,
+					       string->string, string->length)) {
+				parser->code = SGML_PARSER_CODE_MEM_ALLOC;
+				return DOM_STACK_CODE_OK;
+			}
+
+			string = &parent->incomplete;
+			scanner_state = parent->scanner.state;
+
+			/* Pop down to the parent. */
+			parsing = parent;
+			parsing->resume = 0;
+			pop_dom_node(stack);
+		}
+	}
+
+	init_dom_scanner(&parsing->scanner, &sgml_scanner_info, string,
+			 scanner_state, count_lines, complete, incremental,
 			 detect_errors);
-	parser->code = parse_sgml_plain(&parser->stack, &parsing->scanner);
+
+	{
+		int immutable = get_dom_stack_top(&parser->stack)->immutable;
+
+		get_dom_stack_top(&parser->stack)->immutable = 1;
+		parser->code = parse_sgml_plain(&parser->stack, &parsing->scanner);
+		get_dom_stack_top(&parser->stack)->immutable = !!immutable;
+	}
+
+	if (complete || parser->code != SGML_PARSER_CODE_INCOMPLETE) {
+		pop_dom_node(&parser->parsing);
+		return DOM_STACK_CODE_OK;
+	}
+
+	token = get_dom_scanner_token(&parsing->scanner);
+	assert(token && token->type == SGML_TOKEN_INCOMPLETE);
+
+	string = &token->string;
+
+	set_dom_string(&incomplete, NULL, 0);
+
+	if (!init_dom_string(&incomplete, string->string, string->length)) {
+		parser->code = SGML_PARSER_CODE_MEM_ALLOC;
+		return DOM_STACK_CODE_OK;
+	}
+
+	done_dom_string(&parsing->incomplete);
+	set_dom_string(&parsing->incomplete, incomplete.string, incomplete.length);
+	parsing->resume = 1;

 	return DOM_STACK_CODE_OK;
 }
@ -458,14 +512,20 @@ sgml_parsing_pop(struct dom_stack *stack, struct dom_node *node, void *data)
 	struct sgml_parser *parser = get_sgml_parser(stack);
 	struct sgml_parsing_state *parsing = data;

-	/* Pop the stack back to the state it was in. This includes cleaning
-	 * away even immutable states left on the stack. */
-	while (parsing->depth < parser->stack.depth) {
-		get_dom_stack_top(&parser->stack)->immutable = 0;
-		pop_dom_node(&parser->stack);
+	/* Only clean up the stack if complete so that we get proper nesting. */
+	if (parser->flags & SGML_PARSER_COMPLETE) {
+		/* Pop the stack back to the state it was in. This includes cleaning
+		 * away even immutable states left on the stack. */
+		while (parsing->depth < parser->stack.depth) {
+			get_dom_stack_top(&parser->stack)->immutable = 0;
+			pop_dom_node(&parser->stack);
+		}
+		/* It's bigger than when calling done_sgml_parser() in the middle of an
+		 * incomplete parsing. */
+		assert(parsing->depth == parser->stack.depth);
 	}

-	assert(parsing->depth == parser->stack.depth);
+	done_dom_string(&parsing->incomplete);

 	return DOM_STACK_CODE_OK;
 }
@ -611,8 +671,10 @@ init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype,
 void
 done_sgml_parser(struct sgml_parser *parser)
 {
-	done_dom_stack(&parser->stack);
+	while (!dom_stack_is_empty(&parser->parsing))
+		pop_dom_node(&parser->parsing);
 	done_dom_stack(&parser->parsing);
+	done_dom_stack(&parser->stack);
 	done_dom_string(&parser->uri);
 	mem_free(parser);
 }
--- a/src/dom/sgml/scanner.c
+++ b/src/dom/sgml/scanner.c
@ -439,6 +439,7 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 	enum sgml_token_type type = SGML_TOKEN_GARBAGE;
 	int real_length = -1;
 	int possibly_incomplete = 1;
+	enum sgml_scanner_state scanner_state = scanner->state;

 	token->string.string = string++;

@ -451,7 +452,7 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 			string--;
 			real_length = 0;
 			type = SGML_TOKEN_TAG_END;
-			scanner->state = SGML_STATE_TEXT;
+			scanner_state = SGML_STATE_TEXT;

 			/* We are creating a 'virtual' that has no source. */
 			possibly_incomplete = 0;
@ -476,8 +477,8 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 					/* We found the end. */
 					possibly_incomplete = 0;
 				}
-				scanner->state = SGML_STATE_ELEMENT;
 				type = SGML_TOKEN_ELEMENT_BEGIN;
+				scanner_state = SGML_STATE_ELEMENT;
 			}

 		} else if (*string == '!') {
@ -527,7 +528,7 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t

 			type = map_dom_scanner_string(scanner, pos, string, base);

-			scanner->state = SGML_STATE_PROC_INST;
+			scanner_state = SGML_STATE_PROC_INST;

 			real_length = string - token->string.string;
 			skip_sgml_space(scanner, &string);
@ -563,8 +564,9 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 				possibly_incomplete = 0;
 			}

-			if (type != SGML_TOKEN_GARBAGE)
-				scanner->state = SGML_STATE_TEXT;
+			if (type != SGML_TOKEN_GARBAGE) {
+				scanner_state = SGML_STATE_TEXT;
+			}

 		} else {
 			/* Alien < > stuff so ignore it */
@ -594,7 +596,7 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t

 		type = SGML_TOKEN_TAG_END;
 		assert(scanner->state == SGML_STATE_ELEMENT);
-		scanner->state = SGML_STATE_TEXT;
+		scanner_state = SGML_STATE_TEXT;

 	} else if (first_char == '/') {
 		/* We allow '/' inside elements and only consider it as an end
@ -611,7 +613,7 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 			real_length = 0;
 			type = SGML_TOKEN_ELEMENT_EMPTY_END;
 			assert(scanner->state == SGML_STATE_ELEMENT);
-			scanner->state = SGML_STATE_TEXT;
+			scanner_state = SGML_STATE_TEXT;

 			/* We found the end. */
 			possibly_incomplete = 0;
@ -678,6 +680,10 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
 		}
 	}

+	/* Only apply the state change if the token was not abandoned because
+	 * it was incomplete. */
+	scanner->state = scanner_state;
+
 	token->type = type;
 	token->string.length = real_length >= 0 ? real_length : string - token->string.string;
 	token->precedence = get_sgml_precedence(type);
--- a/src/dom/test/sgml-parser.c
+++ b/src/dom/test/sgml-parser.c
@ -266,6 +266,7 @@ main(int argc, char *argv[])
 	enum dom_config_flag normalize_flags = 0;
 	int normalize = 0;
 	int complete = 1;
+	size_t read_stdin = 0;
 	struct dom_string uri = INIT_DOM_STRING("dom://test", -1);
 	struct dom_string source = INIT_DOM_STRING("(no source)", -1);
 	int i;
@ -302,6 +303,20 @@ main(int argc, char *argv[])
 				set_dom_string(&source, argv[i], strlen(argv[i]));
 			}

+		} else if (!strncmp(arg, "stdin", 5)) {
+			arg += 5;
+			if (*arg == '=') {
+				arg++;
+				read_stdin = atoi(arg);
+				set_dom_string(&source, arg, strlen(arg));
+			} else {
+				i++;
+				if (i >= argc)
+					die("--stdin expects a number");
+				read_stdin = atoi(argv[i]);
+			}
+			flags |= SGML_PARSER_INCREMENTAL;
+
 		} else if (!strncmp(arg, "normalize", 9)) {
 			arg += 9;
 			if (*arg == '=') {
@ -343,16 +358,49 @@ main(int argc, char *argv[])
 	else
 		add_dom_stack_context(&parser->stack, NULL, &sgml_parser_test_context_info);

-	code = parse_sgml(parser, source.string, source.length, complete);
+	if (read_stdin > 0) {
+		unsigned char *buffer;
+
+		buffer = mem_alloc(read_stdin);
+		if (!buffer)
+			die("Cannot allocate buffer");
+
+		complete = 0;
+
+		while (!complete) {
+			size_t size = fread(buffer, 1, read_stdin, stdin);
+
+			if (ferror(stdin))
+				die("error reading from stdin");
+
+			complete = feof(stdin);
+
+			code = parse_sgml(parser, buffer, size, complete);
+			switch (code) {
+			case SGML_PARSER_CODE_OK:
+				break;
+
+			case SGML_PARSER_CODE_INCOMPLETE:
+				if (!complete) break;
+				/* Error */
+			default:
+				complete = 1;
+			}
+		}
+
+		mem_free(buffer);
+
+	} else {
+		code = parse_sgml(parser, source.string, source.length, complete);
+	}
+
 	if (parser->root) {
-		size_t root_offset = parser->stack.depth - 1;
+		assert(!complete || parser->stack.depth > 0);

-		assert(!complete || root_offset == 0);
-
-		get_dom_stack_state(&parser->stack, root_offset)->immutable = 0;
-
-		while (!dom_stack_is_empty(&parser->stack))
+		while (!dom_stack_is_empty(&parser->stack)) {
+			get_dom_stack_top(&parser->stack)->immutable = 0;
 			pop_dom_node(&parser->stack);
+		}

 		if (normalize) {
 			struct dom_stack stack;
--- a/src/dom/test/test-sgml-parser-incremental
+++ b/src/dom/test/test-sgml-parser-incremental
@ -0,0 +1,49 @@
+#!/bin/sh
+#
+# Copyright (c) 2005 Jonas Fonseca
+#
+
+test_description='Test incremental parsing of SGML documents.
+
+This test checks if the SGML parser correctly recovers during incremental
+parsing.
+'
+
+. "$TEST_LIB"
+
+test_output_equals () {
+	desc="$1"; shift
+	size="$1"; shift
+	src="$1"; shift
+	out="$1"; shift
+
+	URI="test:$(echo "$desc" | sed '
+		s/^[ \t]*\[[^]]*\][ \t]*//;
+		s/[:., \t][:., \t]*/-/g;
+		s/_/-/g;
+		# *cough*
+		y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/;
+		s/[^a-zA-Z0-9-]//g;')"
+
+	echo "$src" | sgml-parser --uri "$URI" --stdin "$size" \
+	| sed -e 's/^  //' | sed -n '$d;p' > output
+	echo "#document: $URI" > expected
+	echo "$out" | sed -n '2,$p' >> expected
+
+	test_expect_success "$desc" 'cmp output expected' 
+}
+
+for i in 25 20 15 10 9 8 7 6 5 4 3 2 1; do
+	test_output_equals \
+	"Incrementally parse a small document reading $i bytes at a time." \
+	"$i" \
+	'<html><body><p>Hello World!</p></body></html>' \
+	'
+element: html
+  element: body
+    element: p
+      #text: Hello World!'
+
+done
+
+test_done