diff --git a/Makefile.lib b/Makefile.lib index f0219e101..b09537604 100644 --- a/Makefile.lib +++ b/Makefile.lib @@ -127,12 +127,12 @@ CLEAN += $(PROG) $(OBJS) ############################################################################# # The main default rules -all-default: $(ALL_OBJS) $(PROGS) $(MAN1) $(MAN5) .vimrc +all-default: $(ALL_OBJS) $(PROGS) $(MAN1) $(MAN5) # Ensure that Makefiles in subdirs are created before we recursive into them init-recursive: init-default -init-default: .vimrc +init-default: @$(foreach subdir,$(sort $(SUBDIRS)), \ $(MKINSTALLDIRS) $(subdir) >/dev/null; \ test -e "$(subdir)/Makefile" \ @@ -164,11 +164,6 @@ ifdef MAN5 $(call ncmd,installdata,$(file),$(DESTDIR)$(mandir)/man5);) endif -.vimrc: $(top_srcdir)/Makefile.lib - @{ echo ':set runtimepath+=.'; \ - echo ':runtime $(top_srcdir)/config/vimrc'; \ - } > .vimrc - ############################################################################## # Auto-testing infrastructure diff --git a/config/vimrc b/config/vimrc deleted file mode 100644 index 59c5edc29..000000000 --- a/config/vimrc +++ /dev/null @@ -1,8 +0,0 @@ -" Master vimrc file for the ELinks project - -:set shiftwidth=8 -:set tabstop=8 -:set softtabstop=0 -:set noexpandtab - -au BufNewFile,BufRead *.inc setf c diff --git a/contrib/smjs/google_video.js b/contrib/smjs/google_video.js index f267941f1..46b5ee192 100644 --- a/contrib/smjs/google_video.js +++ b/contrib/smjs/google_video.js @@ -1,7 +1,7 @@ /* Play videos at video.google.com with minimal niggling. Just follow the link * from the front page or the search page, and the video will automatically * be loaded. */ -function load_google_video(cached) { +function load_google_video(cached, vs) { if (!cached.uri.match(/^http:\/\/video.google.com\/videoplay/)) return true; diff --git a/contrib/smjs/hooks.js b/contrib/smjs/hooks.js index 875904d32..0278e46f2 100644 --- a/contrib/smjs/hooks.js +++ b/contrib/smjs/hooks.js @@ -8,9 +8,9 @@ elinks.keymaps.main["@"] = function () { }; elinks.preformat_html_hooks = new Array(); -elinks.preformat_html = function (cached) { +elinks.preformat_html = function (cached, vs) { for (var i in elinks.preformat_html_hooks) - if (!elinks.preformat_html_hooks[i](cached)) + if (!elinks.preformat_html_hooks[i](cached, vs)) return false; return true; @@ -36,13 +36,13 @@ elinks.follow_url_hook = function (url) { return url; }; -function root_w00t(cached) { +function root_w00t(cached, vs) { cached.content = cached.content.replace(/root/g, "w00t"); return true; }; elinks.preformat_html_hooks.push(root_w00t); -function mangle_deb_bugnumbers(cached) { +function mangle_deb_bugnumbers(cached, vs) { if (!cached.uri.match(/^[a-z0-9]+:\/\/[a-z0-9A-Z.-]+debian\.org/) && !cached.uri.match(/changelog\.Debian/)) return true; @@ -55,7 +55,14 @@ function mangle_deb_bugnumbers(cached) { /* Debian Policy Manual 4.4 footnote 16 */ var closes_re = /closes:\s*(?:bug)?\#?\s?\d+(?:,\s*(?:bug)?\#?\s?\d+)*/gi; - cached.content = cached.content.replace(closes_re, rewrite_closes_fn); + var new_content = cached.content.replace(closes_re, rewrite_closes_fn); + if (cached.content_type == 'text/plain') { + cached.content = '
' + new_content + ''; + vs.plain = "0"; + } else { + cached.content = new_content; + } + return true; } diff --git a/contrib/vim/c_elinks.vim b/contrib/vim/c_elinks.vim new file mode 100644 index 000000000..51af01728 --- /dev/null +++ b/contrib/vim/c_elinks.vim @@ -0,0 +1,15 @@ +" Setting Vim to support the ELinks coding style +" +" To use this file, drop it in ~/.vim/ftplugin and set filetype plugin on. +" Finally, make sure the path to the source directory contains the word +" 'elinks', for example ~/src/elinks/. +" +" For .h files, link it as cpp_elinks.vim or define c_syntax_for_h in ~/.vimrc. +" For .inc files, let g:filetype_inc = 'c' in ~/.vimrc. + +if expand('%:p:h') =~ '.*elinks.*' + setlocal shiftwidth=8 + setlocal tabstop=8 + setlocal softtabstop=0 + setlocal noexpandtab +endif diff --git a/src/document/dom/renderer.c b/src/document/dom/renderer.c index 131f30eb6..451e21fb0 100644 --- a/src/document/dom/renderer.c +++ b/src/document/dom/renderer.c @@ -1024,7 +1024,7 @@ render_dom_document(struct cache_entry *cached, struct document *document, } else if (renderer.doctype == SGML_DOCTYPE_RSS) { add_dom_stack_context(&parser->stack, &renderer, &dom_rss_renderer_context_info); - add_dom_config_normalizer(&parser->stack, RSS_CONFIG_FLAGS); + add_dom_config_normalizer(&parser->stack, RSS_CONFIG_FLAGS); } /* FIXME: When rendering this way we don't really care about the code. diff --git a/src/dom/configuration.c b/src/dom/configuration.c index fb8f8c921..b228f02e8 100644 --- a/src/dom/configuration.c +++ b/src/dom/configuration.c @@ -44,11 +44,11 @@ normalize_text_node_whitespace(struct dom_node *node) } } - if (node->data.text.allocated) + if (node->allocated) done_dom_string(&node->string); set_dom_string(&node->string, string.string, string.length); - node->data.text.allocated = 1; + node->allocated = 1; return DOM_STACK_CODE_OK; @@ -74,14 +74,14 @@ append_node_text(struct dom_config *config, struct dom_node *node) set_dom_string(&dest, NULL, 0); } else { - if (prev->data.text.allocated) { + if (prev->allocated) { copy_struct(&dest, &prev->string); } else { set_dom_string(&dest, NULL, 0); if (!add_to_dom_string(&dest, prev->string.string, prev->string.length)) return DOM_STACK_CODE_ERROR_MEM_ALLOC; set_dom_string(&prev->string, dest.string, dest.length); - prev->data.text.allocated = 1; + prev->allocated = 1; } } @@ -135,7 +135,7 @@ append_node_text(struct dom_config *config, struct dom_node *node) node->type = DOM_NODE_TEXT; memset(&node->data, 0, sizeof(node->data)); - node->data.text.allocated = 1; + node->allocated = 1; copy_struct(&node->string, &dest); if ((config->flags & DOM_CONFIG_NORMALIZE_WHITESPACE) diff --git a/src/dom/configuration.h b/src/dom/configuration.h index f323c386f..ad0bdb319 100644 --- a/src/dom/configuration.h +++ b/src/dom/configuration.h @@ -7,7 +7,7 @@ struct dom_stack; /* API Doc :: dom-config */ /** DOM Configuration - * + * * The DOMConfiguration interface represents the configuration of a document. * Using the configuration, it is possible to change the behaviour of how * document normalization is done, such as replacing the CDATASection nodes @@ -33,7 +33,7 @@ enum dom_config_flag { DOM_CONFIG_COMMENTS = 2, /** "element-content-whitespace" - * + * * The default is true and will keep all whitespaces in the document. * When false, discard all Text nodes that contain only whitespaces. */ DOM_CONFIG_ELEMENT_CONTENT_WHITESPACE = 4, diff --git a/src/dom/node.c b/src/dom/node.c index 9995c2230..d0ac956ae 100644 --- a/src/dom/node.c +++ b/src/dom/node.c @@ -202,7 +202,7 @@ struct dom_node * get_dom_node_map_entry(struct dom_node_list *list, enum dom_node_type type, uint16_t subtype, struct dom_string *name) { - struct dom_node node = { type, INIT_DOM_STRING(name->string, name->length) }; + struct dom_node node = { type, 0, INIT_DOM_STRING(name->string, name->length) }; struct dom_node_search search = INIT_DOM_NODE_SEARCH(&node, list); if (subtype) { @@ -314,7 +314,7 @@ get_dom_node_child(struct dom_node *parent, enum dom_node_type type, struct dom_node * init_dom_node_(unsigned char *file, int line, struct dom_node *parent, enum dom_node_type type, - struct dom_string *string) + struct dom_string *string, int allocated) { #ifdef DEBUG_MEMLEAK struct dom_node *node = debug_mem_calloc(file, line, 1, sizeof(*node)); @@ -326,7 +326,6 @@ init_dom_node_(unsigned char *file, int line, node->type = type; node->parent = parent; - copy_dom_string(&node->string, string); if (parent) { struct dom_node_list **list = get_dom_node_list(parent, node); @@ -343,6 +342,22 @@ init_dom_node_(unsigned char *file, int line, done_dom_node(node); return NULL; } + + /* Make it possible to add a node to a parent without + * allocating the strings. */ + node->allocated = allocated < 0 ? parent->allocated : !!allocated; + + } else if (allocated >= 0) { + node->allocated = !!allocated; + } + + if (node->allocated) { + if (!init_dom_string(&node->string, string->string, string->length)) { + done_dom_node(node); + return NULL; + } + } else { + copy_dom_string(&node->string, string); } return node; @@ -359,8 +374,8 @@ done_dom_node_data(struct dom_node *node) switch (node->type) { case DOM_NODE_ATTRIBUTE: - if (data->attribute.allocated) - done_dom_string(&node->string); + if (node->allocated) + done_dom_string(&data->attribute.value); break; case DOM_NODE_DOCUMENT: @@ -382,20 +397,19 @@ done_dom_node_data(struct dom_node *node) done_dom_node_list(data->element.map); break; - case DOM_NODE_TEXT: - if (data->text.allocated) - done_dom_string(&node->string); - break; - case DOM_NODE_PROCESSING_INSTRUCTION: if (data->proc_instruction.map) done_dom_node_list(data->proc_instruction.map); + if (node->allocated) + done_dom_string(&data->proc_instruction.instruction); break; default: break; } + if (node->allocated) + done_dom_string(&node->string); mem_free(node); } diff --git a/src/dom/node.h b/src/dom/node.h index 9f4d2cfbd..32948ea15 100644 --- a/src/dom/node.h +++ b/src/dom/node.h @@ -115,9 +115,6 @@ struct dom_attribute_node { * it added from the document source. */ unsigned int specified:1; - /* Was the node->string allocated */ - unsigned int allocated:1; - /* Has the node->string been converted to internal charset. */ unsigned int converted:1; @@ -140,9 +137,6 @@ struct dom_text_node { * In order to quickly identify such nodes this member is used. */ unsigned int only_space:1; - /* Was the node->string allocated */ - unsigned int allocated:1; - /* Has the node->string been converted to internal charset. */ unsigned int converted:1; }; @@ -151,9 +145,8 @@ enum dom_proc_instruction_type { DOM_PROC_INSTRUCTION, /* Keep this group sorted */ - DOM_PROC_INSTRUCTION_DBHTML, /* DocBook toolchain instruction */ - DOM_PROC_INSTRUCTION_ELINKS, /* Internal instruction hook */ - DOM_PROC_INSTRUCTION_XML, /* XML instructions */ + DOM_PROC_INSTRUCTION_XML, /* XML header */ + DOM_PROC_INSTRUCTION_XML_STYLESHEET, /* XML stylesheet link */ DOM_PROC_INSTRUCTION_TYPES }; @@ -198,6 +191,9 @@ struct dom_node { /* The type of the node */ uint16_t type; /* -> enum dom_node_type */ + /* Was the node string allocated? */ + unsigned int allocated:1; + /* Can contain either stuff like element name or for attributes the * attribute name. */ struct dom_string string; @@ -260,12 +256,21 @@ get_dom_node_map_entry(struct dom_node_list *node_map, enum dom_node_type type, uint16_t subtype, struct dom_string *name); +/* Removes the node and all its children and free()s itself */ +void done_dom_node(struct dom_node *node); + +/* The allocated argument is used as the value of node->allocated if >= 0. + * Use -1 to default node->allocated to the value of parent->allocated. */ struct dom_node * init_dom_node_(unsigned char *file, int line, struct dom_node *parent, enum dom_node_type type, - struct dom_string *string); -#define init_dom_node(type, string) init_dom_node_(__FILE__, __LINE__, NULL, type, string) -#define add_dom_node(parent, type, string) init_dom_node_(__FILE__, __LINE__, parent, type, string) + struct dom_string *string, int allocated); + +#define init_dom_node(type, string, allocated) \ + init_dom_node_(__FILE__, __LINE__, NULL, type, string, allocated) + +#define add_dom_node(parent, type, string) \ + init_dom_node_(__FILE__, __LINE__, parent, type, string, -1) #define add_dom_element(parent, string) \ add_dom_node(parent, DOM_NODE_ELEMENT, string) @@ -277,7 +282,16 @@ add_dom_attribute(struct dom_node *parent, struct dom_string *name, struct dom_node *node = add_dom_node(parent, DOM_NODE_ATTRIBUTE, name); if (node && value) { - copy_dom_string(&node->data.attribute.value, value); + struct dom_string *str = &node->data.attribute.value; + + if (node->allocated) { + if (!init_dom_string(str, value->string, value->length)) { + done_dom_node(node); + return NULL; + } + } else { + copy_dom_string(str, value); + } } return node; @@ -290,15 +304,21 @@ add_dom_proc_instruction(struct dom_node *parent, struct dom_string *string, struct dom_node *node = add_dom_node(parent, DOM_NODE_PROCESSING_INSTRUCTION, string); if (node && instruction) { - copy_dom_string(&node->data.proc_instruction.instruction, instruction); + struct dom_string *str = &node->data.proc_instruction.instruction; + + if (node->allocated) { + if (!init_dom_string(str, instruction->string, instruction->length)) { + done_dom_node(node); + return NULL; + } + } else { + copy_dom_string(str, instruction); + } } return node; } -/* Removes the node and all its children and free()s itself */ -void done_dom_node(struct dom_node *node); - /* Compare two nodes returning non-zero if they differ. */ int dom_node_casecmp(struct dom_node *node1, struct dom_node *node2); diff --git a/src/dom/sgml/parser.c b/src/dom/sgml/parser.c index bda43c5c5..217b21151 100644 --- a/src/dom/sgml/parser.c +++ b/src/dom/sgml/parser.c @@ -35,11 +35,13 @@ * information like node subtypes and SGML parser state information. */ static inline struct dom_node * -add_sgml_document(struct dom_stack *stack, struct dom_string *string) +add_sgml_document(struct sgml_parser *parser) { - struct dom_node *node = init_dom_node(DOM_NODE_DOCUMENT, string); + int allocated = parser->flags & SGML_PARSER_INCREMENTAL; + struct dom_node *node; - if (node && push_dom_node(stack, node) == DOM_STACK_CODE_OK) + node = init_dom_node(DOM_NODE_DOCUMENT, &parser->uri, allocated); + if (node && push_dom_node(&parser->stack, node) == DOM_STACK_CODE_OK) return node; return NULL; @@ -74,7 +76,7 @@ add_sgml_element(struct dom_stack *stack, struct dom_scanner_token *token) } -static inline void +static inline struct dom_node * add_sgml_attribute(struct dom_stack *stack, struct dom_scanner_token *token, struct dom_scanner_token *valtoken) { @@ -96,9 +98,11 @@ add_sgml_attribute(struct dom_stack *stack, node->data.attribute.quoted = 1; if (!node || push_dom_node(stack, node) != DOM_STACK_CODE_OK) - return; + return NULL; pop_dom_node(stack); + + return node; } static inline struct dom_node * @@ -117,6 +121,10 @@ add_sgml_proc_instruction(struct dom_stack *stack, struct dom_scanner_token *tar node->data.proc_instruction.type = DOM_PROC_INSTRUCTION_XML; break; + case SGML_TOKEN_PROCESS_XML_STYLESHEET: + node->data.proc_instruction.type = DOM_PROC_INSTRUCTION_XML_STYLESHEET; + break; + case SGML_TOKEN_PROCESS: default: node->data.proc_instruction.type = DOM_PROC_INSTRUCTION; @@ -128,19 +136,21 @@ add_sgml_proc_instruction(struct dom_stack *stack, struct dom_scanner_token *tar return NULL; } -static inline void +static inline struct dom_node * add_sgml_node(struct dom_stack *stack, enum dom_node_type type, struct dom_scanner_token *token) { struct dom_node *parent = get_dom_stack_top(stack)->node; struct dom_node *node = add_dom_node(parent, type, &token->string); - if (!node) return; + if (!node) return NULL; if (token->type == SGML_TOKEN_SPACE) node->data.text.only_space = 1; if (push_dom_node(stack, node) == DOM_STACK_CODE_OK) pop_dom_node(stack); + + return node; } @@ -157,18 +167,42 @@ call_sgml_error_function(struct dom_stack *stack, struct dom_scanner_token *toke return parser->error_func(parser, &token->string, line); } +/* Appends to or 'creates' an incomplete token. This can be used to + * force tokens back into the 'stream' if they require that later tokens + * are available. + * + * NOTE: You can only do this for tokens that are not stripped of markup such + * as identifiers. */ +static enum sgml_parser_code +check_sgml_incomplete(struct dom_scanner *scanner, + struct dom_scanner_token *start, + struct dom_scanner_token *token) +{ + if (token && token->type == SGML_TOKEN_INCOMPLETE) { + token->string.length += token->string.string - start->string.string; + token->string.string = start->string.string; + return 1; + + } else if (!token && scanner->check_complete && scanner->incomplete) { + size_t left = scanner->end - start->string.string; + + assert(left > 0); + + token = scanner->current = scanner->table; + scanner->tokens = 1; + token->type = SGML_TOKEN_INCOMPLETE; + set_dom_string(&token->string, start->string.string, left); + return 1; + } + + return 0; +} + static inline enum sgml_parser_code parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner) { struct dom_scanner_token name; - assert(dom_scanner_has_tokens(scanner) - && (get_dom_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN - || (get_dom_stack_top(stack)->node->type == DOM_NODE_PROCESSING_INSTRUCTION))); - - if (get_dom_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN) - skip_dom_scanner_token(scanner); - while (dom_scanner_has_tokens(scanner)) { struct dom_scanner_token *token = get_dom_scanner_token(scanner); @@ -194,7 +228,7 @@ parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner) /* If the token is not a valid value token * ignore it. */ token = get_next_dom_scanner_token(scanner); - if (token && token->type == SGML_TOKEN_INCOMPLETE) + if (check_sgml_incomplete(scanner, &name, token)) return SGML_PARSER_CODE_INCOMPLETE; if (token @@ -203,14 +237,15 @@ parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner) && token->type != SGML_TOKEN_STRING) token = NULL; - } else if (token && token->type == SGML_TOKEN_INCOMPLETE) { + } else if (check_sgml_incomplete(scanner, &name, token)) { return SGML_PARSER_CODE_INCOMPLETE; } else { token = NULL; } - add_sgml_attribute(stack, &name, token); + if (!add_sgml_attribute(stack, &name, token)) + return SGML_PARSER_CODE_MEM_ALLOC; /* Skip the value token */ if (token) @@ -250,19 +285,14 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner) switch (token->type) { case SGML_TOKEN_ELEMENT: case SGML_TOKEN_ELEMENT_BEGIN: - if (!add_sgml_element(stack, token)) { - if (token->type == SGML_TOKEN_ELEMENT) { - skip_dom_scanner_token(scanner); - break; - } - - skip_sgml_tokens(scanner, SGML_TOKEN_TAG_END); - break; - } + if (!add_sgml_element(stack, token)) + return SGML_PARSER_CODE_MEM_ALLOC; if (token->type == SGML_TOKEN_ELEMENT_BEGIN) { enum sgml_parser_code code; + skip_dom_scanner_token(scanner); + code = parse_sgml_attributes(stack, scanner); if (code != SGML_PARSER_CODE_OK) return code; @@ -301,7 +331,8 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner) break; case SGML_TOKEN_NOTATION_COMMENT: - add_sgml_node(stack, DOM_NODE_COMMENT, token); + if (!add_sgml_node(stack, DOM_NODE_COMMENT, token)) + return SGML_PARSER_CODE_MEM_ALLOC; skip_dom_scanner_token(scanner); break; @@ -314,7 +345,8 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner) break; case SGML_TOKEN_CDATA_SECTION: - add_sgml_node(stack, DOM_NODE_CDATA_SECTION, token); + if (!add_sgml_node(stack, DOM_NODE_CDATA_SECTION, token)) + return SGML_PARSER_CODE_MEM_ALLOC; skip_dom_scanner_token(scanner); break; @@ -334,10 +366,10 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner) assert(token->type == SGML_TOKEN_PROCESS_DATA); /* Fall-through */ - case SGML_TOKEN_PROCESS_DATA: - if (add_sgml_proc_instruction(stack, &target, token) - && (target.type == SGML_TOKEN_PROCESS_XML - || target.type == SGML_TOKEN_PROCESS_XML_STYLESHEET) + if (!add_sgml_proc_instruction(stack, &target, token)) + return SGML_PARSER_CODE_MEM_ALLOC; + if ((target.type == SGML_TOKEN_PROCESS_XML + || target.type == SGML_TOKEN_PROCESS_XML_STYLESHEET) && token->string.length > 0) { /* Parse the . */ struct dom_scanner attr_scanner; @@ -402,13 +434,13 @@ parse_sgml(struct sgml_parser *parser, unsigned char *buf, size_t bufsize, parser->flags |= SGML_PARSER_COMPLETE; if (!parser->root) { - parser->root = add_sgml_document(&parser->stack, &parser->uri); + parser->root = add_sgml_document(parser); if (!parser->root) return SGML_PARSER_CODE_MEM_ALLOC; get_dom_stack_top(&parser->stack)->immutable = 1; } - node = init_dom_node(DOM_NODE_TEXT, &source); + node = init_dom_node(DOM_NODE_TEXT, &source, 0); if (!node || push_dom_node(&parser->parsing, node) != DOM_STACK_CODE_OK) return SGML_PARSER_CODE_MEM_ALLOC; @@ -452,15 +484,19 @@ sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data) struct sgml_parsing_state *parent = &parsing[-1]; if (parent->resume) { - assert(is_dom_string_set(&parent->incomplete)); + if (is_dom_string_set(&parent->incomplete)) { - if (!add_to_dom_string(&parent->incomplete, - string->string, string->length)) { - parser->code = SGML_PARSER_CODE_MEM_ALLOC; - return DOM_STACK_CODE_OK; + if (!add_to_dom_string(&parent->incomplete, + string->string, + string->length)) { + + parser->code = SGML_PARSER_CODE_MEM_ALLOC; + return DOM_STACK_CODE_OK; + } + + string = &parent->incomplete; } - string = &parent->incomplete; scanner_state = parent->scanner.state; /* Pop down to the parent. */ @@ -474,19 +510,31 @@ sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data) scanner_state, count_lines, complete, incremental, detect_errors); - { - int immutable = get_dom_stack_top(&parser->stack)->immutable; - - get_dom_stack_top(&parser->stack)->immutable = 1; + if (scanner_state == SGML_STATE_ELEMENT) { + parser->code = parse_sgml_attributes(&parser->stack, &parsing->scanner); + if (parser->code == SGML_PARSER_CODE_OK) + parser->code = parse_sgml_plain(&parser->stack, &parsing->scanner); + } else { parser->code = parse_sgml_plain(&parser->stack, &parsing->scanner); - get_dom_stack_top(&parser->stack)->immutable = !!immutable; } - if (complete || parser->code != SGML_PARSER_CODE_INCOMPLETE) { + if (complete) { pop_dom_node(&parser->parsing); return DOM_STACK_CODE_OK; } + if (parser->code != SGML_PARSER_CODE_INCOMPLETE) { + /* No need to preserve the default scanner state. */ + if (parsing->scanner.state == SGML_STATE_TEXT) { + pop_dom_node(&parser->parsing); + return DOM_STACK_CODE_OK; + } + + done_dom_string(&parsing->incomplete); + parsing->resume = 1; + return DOM_STACK_CODE_OK; + } + token = get_dom_scanner_token(&parsing->scanner); assert(token && token->type == SGML_TOKEN_INCOMPLETE); @@ -522,7 +570,7 @@ sgml_parsing_pop(struct dom_stack *stack, struct dom_node *node, void *data) } /* It's bigger than when calling done_sgml_parser() in the middle of an * incomplete parsing. */ - assert(parsing->depth == parser->stack.depth); + assert(parsing->depth >= parser->stack.depth); } done_dom_string(&parsing->incomplete); diff --git a/src/dom/sgml/scanner.c b/src/dom/sgml/scanner.c index f4a6578ac..e2e7c5db5 100644 --- a/src/dom/sgml/scanner.c +++ b/src/dom/sgml/scanner.c @@ -449,13 +449,16 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t if (scanner->state == SGML_STATE_ELEMENT) { /* Already inside an element so insert a tag end token * and continue scanning in next iteration. */ - string--; - real_length = 0; type = SGML_TOKEN_TAG_END; scanner_state = SGML_STATE_TEXT; /* We are creating a 'virtual' that has no source. */ possibly_incomplete = 0; + string = token->string.string; + real_length = 0; + + } else if (string == scanner->end) { + /* It is incomplete. */ } else if (is_sgml_ident(*string)) { token->string.string = string; @@ -540,6 +543,29 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t possibly_incomplete = 0; } + if (scanner->check_complete && scanner->incomplete) { + /* We need to fit both the process target token + * and the process data token into the scanner + * table. */ + if (token + 1 >= scanner->table + DOM_SCANNER_TOKENS) { + possibly_incomplete = 1; + + } else if (!possibly_incomplete) { + /* FIXME: We do this twice. */ + for (pos = string + 1; + (pos = skip_sgml_chars(scanner, pos, '>')); + pos++) { + if (pos[-1] == '?') + break; + } + if (!pos) + possibly_incomplete = 1; + } + + if (possibly_incomplete) + string = scanner->end; + } + } else if (*string == '/') { string++; skip_sgml_space(scanner, &string); @@ -641,6 +667,10 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t /* We found the end. */ possibly_incomplete = 0; + } else if (scanner->check_complete && scanner->incomplete) { + /* Force an incomplete token. */ + string = scanner->end; + } else if (is_sgml_attribute(*string)) { token->string.string++; scan_sgml_attribute(scanner, string); @@ -698,9 +728,9 @@ scan_sgml_proc_inst_token(struct dom_scanner *scanner, struct dom_scanner_token { unsigned char *string = scanner->position; /* The length can be empty for '?>'. */ - size_t length = -1; + ssize_t length = -1; - token->string.string = string; + token->string.string = string++; /* Figure out where the processing instruction ends. This doesn't use * skip_sgml() since we MUST ignore precedence here to allow '<' inside diff --git a/src/dom/test/test-sgml-parser-basic b/src/dom/test/test-sgml-parser-basic index 1a22b7fed..f19f158eb 100755 --- a/src/dom/test/test-sgml-parser-basic +++ b/src/dom/test/test-sgml-parser-basic @@ -56,7 +56,7 @@ element: root #text: a' test_output_equals \ -'Parse tag soup elements.' \ +'Parse tag soup elements. (I)' \ '
Hello World!
' \ +test_incremental_parsing \ +"Parse a small document." \ +'Hello World!
' \ ' element: html element: body element: p #text: Hello World!' -done +test_incremental_parsing \ +'Parse elements.' \ +'