diff --git a/Makefile.lib b/Makefile.lib index f0219e101..b09537604 100644 --- a/Makefile.lib +++ b/Makefile.lib @@ -127,12 +127,12 @@ CLEAN += $(PROG) $(OBJS) ############################################################################# # The main default rules -all-default: $(ALL_OBJS) $(PROGS) $(MAN1) $(MAN5) .vimrc +all-default: $(ALL_OBJS) $(PROGS) $(MAN1) $(MAN5) # Ensure that Makefiles in subdirs are created before we recursive into them init-recursive: init-default -init-default: .vimrc +init-default: @$(foreach subdir,$(sort $(SUBDIRS)), \ $(MKINSTALLDIRS) $(subdir) >/dev/null; \ test -e "$(subdir)/Makefile" \ @@ -164,11 +164,6 @@ ifdef MAN5 $(call ncmd,installdata,$(file),$(DESTDIR)$(mandir)/man5);) endif -.vimrc: $(top_srcdir)/Makefile.lib - @{ echo ':set runtimepath+=.'; \ - echo ':runtime $(top_srcdir)/config/vimrc'; \ - } > .vimrc - ############################################################################## # Auto-testing infrastructure diff --git a/config/vimrc b/config/vimrc deleted file mode 100644 index 59c5edc29..000000000 --- a/config/vimrc +++ /dev/null @@ -1,8 +0,0 @@ -" Master vimrc file for the ELinks project - -:set shiftwidth=8 -:set tabstop=8 -:set softtabstop=0 -:set noexpandtab - -au BufNewFile,BufRead *.inc setf c diff --git a/contrib/smjs/google_video.js b/contrib/smjs/google_video.js index f267941f1..46b5ee192 100644 --- a/contrib/smjs/google_video.js +++ b/contrib/smjs/google_video.js @@ -1,7 +1,7 @@ /* Play videos at video.google.com with minimal niggling. Just follow the link * from the front page or the search page, and the video will automatically * be loaded. */ -function load_google_video(cached) { +function load_google_video(cached, vs) { if (!cached.uri.match(/^http:\/\/video.google.com\/videoplay/)) return true; diff --git a/contrib/smjs/hooks.js b/contrib/smjs/hooks.js index 875904d32..0278e46f2 100644 --- a/contrib/smjs/hooks.js +++ b/contrib/smjs/hooks.js @@ -8,9 +8,9 @@ elinks.keymaps.main["@"] = function () { }; elinks.preformat_html_hooks = new Array(); -elinks.preformat_html = function (cached) { +elinks.preformat_html = function (cached, vs) { for (var i in elinks.preformat_html_hooks) - if (!elinks.preformat_html_hooks[i](cached)) + if (!elinks.preformat_html_hooks[i](cached, vs)) return false; return true; @@ -36,13 +36,13 @@ elinks.follow_url_hook = function (url) { return url; }; -function root_w00t(cached) { +function root_w00t(cached, vs) { cached.content = cached.content.replace(/root/g, "w00t"); return true; }; elinks.preformat_html_hooks.push(root_w00t); -function mangle_deb_bugnumbers(cached) { +function mangle_deb_bugnumbers(cached, vs) { if (!cached.uri.match(/^[a-z0-9]+:\/\/[a-z0-9A-Z.-]+debian\.org/) && !cached.uri.match(/changelog\.Debian/)) return true; @@ -55,7 +55,14 @@ function mangle_deb_bugnumbers(cached) { /* Debian Policy Manual 4.4 footnote 16 */ var closes_re = /closes:\s*(?:bug)?\#?\s?\d+(?:,\s*(?:bug)?\#?\s?\d+)*/gi; - cached.content = cached.content.replace(closes_re, rewrite_closes_fn); + var new_content = cached.content.replace(closes_re, rewrite_closes_fn); + if (cached.content_type == 'text/plain') { + cached.content = '
' + new_content + '
'; + vs.plain = "0"; + } else { + cached.content = new_content; + } + return true; } diff --git a/contrib/vim/c_elinks.vim b/contrib/vim/c_elinks.vim new file mode 100644 index 000000000..51af01728 --- /dev/null +++ b/contrib/vim/c_elinks.vim @@ -0,0 +1,15 @@ +" Setting Vim to support the ELinks coding style +" +" To use this file, drop it in ~/.vim/ftplugin and set filetype plugin on. +" Finally, make sure the path to the source directory contains the word +" 'elinks', for example ~/src/elinks/. +" +" For .h files, link it as cpp_elinks.vim or define c_syntax_for_h in ~/.vimrc. +" For .inc files, let g:filetype_inc = 'c' in ~/.vimrc. + +if expand('%:p:h') =~ '.*elinks.*' + setlocal shiftwidth=8 + setlocal tabstop=8 + setlocal softtabstop=0 + setlocal noexpandtab +endif diff --git a/src/document/dom/renderer.c b/src/document/dom/renderer.c index 131f30eb6..451e21fb0 100644 --- a/src/document/dom/renderer.c +++ b/src/document/dom/renderer.c @@ -1024,7 +1024,7 @@ render_dom_document(struct cache_entry *cached, struct document *document, } else if (renderer.doctype == SGML_DOCTYPE_RSS) { add_dom_stack_context(&parser->stack, &renderer, &dom_rss_renderer_context_info); - add_dom_config_normalizer(&parser->stack, RSS_CONFIG_FLAGS); + add_dom_config_normalizer(&parser->stack, RSS_CONFIG_FLAGS); } /* FIXME: When rendering this way we don't really care about the code. diff --git a/src/dom/configuration.c b/src/dom/configuration.c index fb8f8c921..b228f02e8 100644 --- a/src/dom/configuration.c +++ b/src/dom/configuration.c @@ -44,11 +44,11 @@ normalize_text_node_whitespace(struct dom_node *node) } } - if (node->data.text.allocated) + if (node->allocated) done_dom_string(&node->string); set_dom_string(&node->string, string.string, string.length); - node->data.text.allocated = 1; + node->allocated = 1; return DOM_STACK_CODE_OK; @@ -74,14 +74,14 @@ append_node_text(struct dom_config *config, struct dom_node *node) set_dom_string(&dest, NULL, 0); } else { - if (prev->data.text.allocated) { + if (prev->allocated) { copy_struct(&dest, &prev->string); } else { set_dom_string(&dest, NULL, 0); if (!add_to_dom_string(&dest, prev->string.string, prev->string.length)) return DOM_STACK_CODE_ERROR_MEM_ALLOC; set_dom_string(&prev->string, dest.string, dest.length); - prev->data.text.allocated = 1; + prev->allocated = 1; } } @@ -135,7 +135,7 @@ append_node_text(struct dom_config *config, struct dom_node *node) node->type = DOM_NODE_TEXT; memset(&node->data, 0, sizeof(node->data)); - node->data.text.allocated = 1; + node->allocated = 1; copy_struct(&node->string, &dest); if ((config->flags & DOM_CONFIG_NORMALIZE_WHITESPACE) diff --git a/src/dom/configuration.h b/src/dom/configuration.h index f323c386f..ad0bdb319 100644 --- a/src/dom/configuration.h +++ b/src/dom/configuration.h @@ -7,7 +7,7 @@ struct dom_stack; /* API Doc :: dom-config */ /** DOM Configuration - * + * * The DOMConfiguration interface represents the configuration of a document. * Using the configuration, it is possible to change the behaviour of how * document normalization is done, such as replacing the CDATASection nodes @@ -33,7 +33,7 @@ enum dom_config_flag { DOM_CONFIG_COMMENTS = 2, /** "element-content-whitespace" - * + * * The default is true and will keep all whitespaces in the document. * When false, discard all Text nodes that contain only whitespaces. */ DOM_CONFIG_ELEMENT_CONTENT_WHITESPACE = 4, diff --git a/src/dom/node.c b/src/dom/node.c index 9995c2230..d0ac956ae 100644 --- a/src/dom/node.c +++ b/src/dom/node.c @@ -202,7 +202,7 @@ struct dom_node * get_dom_node_map_entry(struct dom_node_list *list, enum dom_node_type type, uint16_t subtype, struct dom_string *name) { - struct dom_node node = { type, INIT_DOM_STRING(name->string, name->length) }; + struct dom_node node = { type, 0, INIT_DOM_STRING(name->string, name->length) }; struct dom_node_search search = INIT_DOM_NODE_SEARCH(&node, list); if (subtype) { @@ -314,7 +314,7 @@ get_dom_node_child(struct dom_node *parent, enum dom_node_type type, struct dom_node * init_dom_node_(unsigned char *file, int line, struct dom_node *parent, enum dom_node_type type, - struct dom_string *string) + struct dom_string *string, int allocated) { #ifdef DEBUG_MEMLEAK struct dom_node *node = debug_mem_calloc(file, line, 1, sizeof(*node)); @@ -326,7 +326,6 @@ init_dom_node_(unsigned char *file, int line, node->type = type; node->parent = parent; - copy_dom_string(&node->string, string); if (parent) { struct dom_node_list **list = get_dom_node_list(parent, node); @@ -343,6 +342,22 @@ init_dom_node_(unsigned char *file, int line, done_dom_node(node); return NULL; } + + /* Make it possible to add a node to a parent without + * allocating the strings. */ + node->allocated = allocated < 0 ? parent->allocated : !!allocated; + + } else if (allocated >= 0) { + node->allocated = !!allocated; + } + + if (node->allocated) { + if (!init_dom_string(&node->string, string->string, string->length)) { + done_dom_node(node); + return NULL; + } + } else { + copy_dom_string(&node->string, string); } return node; @@ -359,8 +374,8 @@ done_dom_node_data(struct dom_node *node) switch (node->type) { case DOM_NODE_ATTRIBUTE: - if (data->attribute.allocated) - done_dom_string(&node->string); + if (node->allocated) + done_dom_string(&data->attribute.value); break; case DOM_NODE_DOCUMENT: @@ -382,20 +397,19 @@ done_dom_node_data(struct dom_node *node) done_dom_node_list(data->element.map); break; - case DOM_NODE_TEXT: - if (data->text.allocated) - done_dom_string(&node->string); - break; - case DOM_NODE_PROCESSING_INSTRUCTION: if (data->proc_instruction.map) done_dom_node_list(data->proc_instruction.map); + if (node->allocated) + done_dom_string(&data->proc_instruction.instruction); break; default: break; } + if (node->allocated) + done_dom_string(&node->string); mem_free(node); } diff --git a/src/dom/node.h b/src/dom/node.h index 9f4d2cfbd..32948ea15 100644 --- a/src/dom/node.h +++ b/src/dom/node.h @@ -115,9 +115,6 @@ struct dom_attribute_node { * it added from the document source. */ unsigned int specified:1; - /* Was the node->string allocated */ - unsigned int allocated:1; - /* Has the node->string been converted to internal charset. */ unsigned int converted:1; @@ -140,9 +137,6 @@ struct dom_text_node { * In order to quickly identify such nodes this member is used. */ unsigned int only_space:1; - /* Was the node->string allocated */ - unsigned int allocated:1; - /* Has the node->string been converted to internal charset. */ unsigned int converted:1; }; @@ -151,9 +145,8 @@ enum dom_proc_instruction_type { DOM_PROC_INSTRUCTION, /* Keep this group sorted */ - DOM_PROC_INSTRUCTION_DBHTML, /* DocBook toolchain instruction */ - DOM_PROC_INSTRUCTION_ELINKS, /* Internal instruction hook */ - DOM_PROC_INSTRUCTION_XML, /* XML instructions */ + DOM_PROC_INSTRUCTION_XML, /* XML header */ + DOM_PROC_INSTRUCTION_XML_STYLESHEET, /* XML stylesheet link */ DOM_PROC_INSTRUCTION_TYPES }; @@ -198,6 +191,9 @@ struct dom_node { /* The type of the node */ uint16_t type; /* -> enum dom_node_type */ + /* Was the node string allocated? */ + unsigned int allocated:1; + /* Can contain either stuff like element name or for attributes the * attribute name. */ struct dom_string string; @@ -260,12 +256,21 @@ get_dom_node_map_entry(struct dom_node_list *node_map, enum dom_node_type type, uint16_t subtype, struct dom_string *name); +/* Removes the node and all its children and free()s itself */ +void done_dom_node(struct dom_node *node); + +/* The allocated argument is used as the value of node->allocated if >= 0. + * Use -1 to default node->allocated to the value of parent->allocated. */ struct dom_node * init_dom_node_(unsigned char *file, int line, struct dom_node *parent, enum dom_node_type type, - struct dom_string *string); -#define init_dom_node(type, string) init_dom_node_(__FILE__, __LINE__, NULL, type, string) -#define add_dom_node(parent, type, string) init_dom_node_(__FILE__, __LINE__, parent, type, string) + struct dom_string *string, int allocated); + +#define init_dom_node(type, string, allocated) \ + init_dom_node_(__FILE__, __LINE__, NULL, type, string, allocated) + +#define add_dom_node(parent, type, string) \ + init_dom_node_(__FILE__, __LINE__, parent, type, string, -1) #define add_dom_element(parent, string) \ add_dom_node(parent, DOM_NODE_ELEMENT, string) @@ -277,7 +282,16 @@ add_dom_attribute(struct dom_node *parent, struct dom_string *name, struct dom_node *node = add_dom_node(parent, DOM_NODE_ATTRIBUTE, name); if (node && value) { - copy_dom_string(&node->data.attribute.value, value); + struct dom_string *str = &node->data.attribute.value; + + if (node->allocated) { + if (!init_dom_string(str, value->string, value->length)) { + done_dom_node(node); + return NULL; + } + } else { + copy_dom_string(str, value); + } } return node; @@ -290,15 +304,21 @@ add_dom_proc_instruction(struct dom_node *parent, struct dom_string *string, struct dom_node *node = add_dom_node(parent, DOM_NODE_PROCESSING_INSTRUCTION, string); if (node && instruction) { - copy_dom_string(&node->data.proc_instruction.instruction, instruction); + struct dom_string *str = &node->data.proc_instruction.instruction; + + if (node->allocated) { + if (!init_dom_string(str, instruction->string, instruction->length)) { + done_dom_node(node); + return NULL; + } + } else { + copy_dom_string(str, instruction); + } } return node; } -/* Removes the node and all its children and free()s itself */ -void done_dom_node(struct dom_node *node); - /* Compare two nodes returning non-zero if they differ. */ int dom_node_casecmp(struct dom_node *node1, struct dom_node *node2); diff --git a/src/dom/sgml/parser.c b/src/dom/sgml/parser.c index bda43c5c5..217b21151 100644 --- a/src/dom/sgml/parser.c +++ b/src/dom/sgml/parser.c @@ -35,11 +35,13 @@ * information like node subtypes and SGML parser state information. */ static inline struct dom_node * -add_sgml_document(struct dom_stack *stack, struct dom_string *string) +add_sgml_document(struct sgml_parser *parser) { - struct dom_node *node = init_dom_node(DOM_NODE_DOCUMENT, string); + int allocated = parser->flags & SGML_PARSER_INCREMENTAL; + struct dom_node *node; - if (node && push_dom_node(stack, node) == DOM_STACK_CODE_OK) + node = init_dom_node(DOM_NODE_DOCUMENT, &parser->uri, allocated); + if (node && push_dom_node(&parser->stack, node) == DOM_STACK_CODE_OK) return node; return NULL; @@ -74,7 +76,7 @@ add_sgml_element(struct dom_stack *stack, struct dom_scanner_token *token) } -static inline void +static inline struct dom_node * add_sgml_attribute(struct dom_stack *stack, struct dom_scanner_token *token, struct dom_scanner_token *valtoken) { @@ -96,9 +98,11 @@ add_sgml_attribute(struct dom_stack *stack, node->data.attribute.quoted = 1; if (!node || push_dom_node(stack, node) != DOM_STACK_CODE_OK) - return; + return NULL; pop_dom_node(stack); + + return node; } static inline struct dom_node * @@ -117,6 +121,10 @@ add_sgml_proc_instruction(struct dom_stack *stack, struct dom_scanner_token *tar node->data.proc_instruction.type = DOM_PROC_INSTRUCTION_XML; break; + case SGML_TOKEN_PROCESS_XML_STYLESHEET: + node->data.proc_instruction.type = DOM_PROC_INSTRUCTION_XML_STYLESHEET; + break; + case SGML_TOKEN_PROCESS: default: node->data.proc_instruction.type = DOM_PROC_INSTRUCTION; @@ -128,19 +136,21 @@ add_sgml_proc_instruction(struct dom_stack *stack, struct dom_scanner_token *tar return NULL; } -static inline void +static inline struct dom_node * add_sgml_node(struct dom_stack *stack, enum dom_node_type type, struct dom_scanner_token *token) { struct dom_node *parent = get_dom_stack_top(stack)->node; struct dom_node *node = add_dom_node(parent, type, &token->string); - if (!node) return; + if (!node) return NULL; if (token->type == SGML_TOKEN_SPACE) node->data.text.only_space = 1; if (push_dom_node(stack, node) == DOM_STACK_CODE_OK) pop_dom_node(stack); + + return node; } @@ -157,18 +167,42 @@ call_sgml_error_function(struct dom_stack *stack, struct dom_scanner_token *toke return parser->error_func(parser, &token->string, line); } +/* Appends to or 'creates' an incomplete token. This can be used to + * force tokens back into the 'stream' if they require that later tokens + * are available. + * + * NOTE: You can only do this for tokens that are not stripped of markup such + * as identifiers. */ +static enum sgml_parser_code +check_sgml_incomplete(struct dom_scanner *scanner, + struct dom_scanner_token *start, + struct dom_scanner_token *token) +{ + if (token && token->type == SGML_TOKEN_INCOMPLETE) { + token->string.length += token->string.string - start->string.string; + token->string.string = start->string.string; + return 1; + + } else if (!token && scanner->check_complete && scanner->incomplete) { + size_t left = scanner->end - start->string.string; + + assert(left > 0); + + token = scanner->current = scanner->table; + scanner->tokens = 1; + token->type = SGML_TOKEN_INCOMPLETE; + set_dom_string(&token->string, start->string.string, left); + return 1; + } + + return 0; +} + static inline enum sgml_parser_code parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner) { struct dom_scanner_token name; - assert(dom_scanner_has_tokens(scanner) - && (get_dom_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN - || (get_dom_stack_top(stack)->node->type == DOM_NODE_PROCESSING_INSTRUCTION))); - - if (get_dom_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN) - skip_dom_scanner_token(scanner); - while (dom_scanner_has_tokens(scanner)) { struct dom_scanner_token *token = get_dom_scanner_token(scanner); @@ -194,7 +228,7 @@ parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner) /* If the token is not a valid value token * ignore it. */ token = get_next_dom_scanner_token(scanner); - if (token && token->type == SGML_TOKEN_INCOMPLETE) + if (check_sgml_incomplete(scanner, &name, token)) return SGML_PARSER_CODE_INCOMPLETE; if (token @@ -203,14 +237,15 @@ parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner) && token->type != SGML_TOKEN_STRING) token = NULL; - } else if (token && token->type == SGML_TOKEN_INCOMPLETE) { + } else if (check_sgml_incomplete(scanner, &name, token)) { return SGML_PARSER_CODE_INCOMPLETE; } else { token = NULL; } - add_sgml_attribute(stack, &name, token); + if (!add_sgml_attribute(stack, &name, token)) + return SGML_PARSER_CODE_MEM_ALLOC; /* Skip the value token */ if (token) @@ -250,19 +285,14 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner) switch (token->type) { case SGML_TOKEN_ELEMENT: case SGML_TOKEN_ELEMENT_BEGIN: - if (!add_sgml_element(stack, token)) { - if (token->type == SGML_TOKEN_ELEMENT) { - skip_dom_scanner_token(scanner); - break; - } - - skip_sgml_tokens(scanner, SGML_TOKEN_TAG_END); - break; - } + if (!add_sgml_element(stack, token)) + return SGML_PARSER_CODE_MEM_ALLOC; if (token->type == SGML_TOKEN_ELEMENT_BEGIN) { enum sgml_parser_code code; + skip_dom_scanner_token(scanner); + code = parse_sgml_attributes(stack, scanner); if (code != SGML_PARSER_CODE_OK) return code; @@ -301,7 +331,8 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner) break; case SGML_TOKEN_NOTATION_COMMENT: - add_sgml_node(stack, DOM_NODE_COMMENT, token); + if (!add_sgml_node(stack, DOM_NODE_COMMENT, token)) + return SGML_PARSER_CODE_MEM_ALLOC; skip_dom_scanner_token(scanner); break; @@ -314,7 +345,8 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner) break; case SGML_TOKEN_CDATA_SECTION: - add_sgml_node(stack, DOM_NODE_CDATA_SECTION, token); + if (!add_sgml_node(stack, DOM_NODE_CDATA_SECTION, token)) + return SGML_PARSER_CODE_MEM_ALLOC; skip_dom_scanner_token(scanner); break; @@ -334,10 +366,10 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner) assert(token->type == SGML_TOKEN_PROCESS_DATA); /* Fall-through */ - case SGML_TOKEN_PROCESS_DATA: - if (add_sgml_proc_instruction(stack, &target, token) - && (target.type == SGML_TOKEN_PROCESS_XML - || target.type == SGML_TOKEN_PROCESS_XML_STYLESHEET) + if (!add_sgml_proc_instruction(stack, &target, token)) + return SGML_PARSER_CODE_MEM_ALLOC; + if ((target.type == SGML_TOKEN_PROCESS_XML + || target.type == SGML_TOKEN_PROCESS_XML_STYLESHEET) && token->string.length > 0) { /* Parse the . */ struct dom_scanner attr_scanner; @@ -402,13 +434,13 @@ parse_sgml(struct sgml_parser *parser, unsigned char *buf, size_t bufsize, parser->flags |= SGML_PARSER_COMPLETE; if (!parser->root) { - parser->root = add_sgml_document(&parser->stack, &parser->uri); + parser->root = add_sgml_document(parser); if (!parser->root) return SGML_PARSER_CODE_MEM_ALLOC; get_dom_stack_top(&parser->stack)->immutable = 1; } - node = init_dom_node(DOM_NODE_TEXT, &source); + node = init_dom_node(DOM_NODE_TEXT, &source, 0); if (!node || push_dom_node(&parser->parsing, node) != DOM_STACK_CODE_OK) return SGML_PARSER_CODE_MEM_ALLOC; @@ -452,15 +484,19 @@ sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data) struct sgml_parsing_state *parent = &parsing[-1]; if (parent->resume) { - assert(is_dom_string_set(&parent->incomplete)); + if (is_dom_string_set(&parent->incomplete)) { - if (!add_to_dom_string(&parent->incomplete, - string->string, string->length)) { - parser->code = SGML_PARSER_CODE_MEM_ALLOC; - return DOM_STACK_CODE_OK; + if (!add_to_dom_string(&parent->incomplete, + string->string, + string->length)) { + + parser->code = SGML_PARSER_CODE_MEM_ALLOC; + return DOM_STACK_CODE_OK; + } + + string = &parent->incomplete; } - string = &parent->incomplete; scanner_state = parent->scanner.state; /* Pop down to the parent. */ @@ -474,19 +510,31 @@ sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data) scanner_state, count_lines, complete, incremental, detect_errors); - { - int immutable = get_dom_stack_top(&parser->stack)->immutable; - - get_dom_stack_top(&parser->stack)->immutable = 1; + if (scanner_state == SGML_STATE_ELEMENT) { + parser->code = parse_sgml_attributes(&parser->stack, &parsing->scanner); + if (parser->code == SGML_PARSER_CODE_OK) + parser->code = parse_sgml_plain(&parser->stack, &parsing->scanner); + } else { parser->code = parse_sgml_plain(&parser->stack, &parsing->scanner); - get_dom_stack_top(&parser->stack)->immutable = !!immutable; } - if (complete || parser->code != SGML_PARSER_CODE_INCOMPLETE) { + if (complete) { pop_dom_node(&parser->parsing); return DOM_STACK_CODE_OK; } + if (parser->code != SGML_PARSER_CODE_INCOMPLETE) { + /* No need to preserve the default scanner state. */ + if (parsing->scanner.state == SGML_STATE_TEXT) { + pop_dom_node(&parser->parsing); + return DOM_STACK_CODE_OK; + } + + done_dom_string(&parsing->incomplete); + parsing->resume = 1; + return DOM_STACK_CODE_OK; + } + token = get_dom_scanner_token(&parsing->scanner); assert(token && token->type == SGML_TOKEN_INCOMPLETE); @@ -522,7 +570,7 @@ sgml_parsing_pop(struct dom_stack *stack, struct dom_node *node, void *data) } /* It's bigger than when calling done_sgml_parser() in the middle of an * incomplete parsing. */ - assert(parsing->depth == parser->stack.depth); + assert(parsing->depth >= parser->stack.depth); } done_dom_string(&parsing->incomplete); diff --git a/src/dom/sgml/scanner.c b/src/dom/sgml/scanner.c index f4a6578ac..e2e7c5db5 100644 --- a/src/dom/sgml/scanner.c +++ b/src/dom/sgml/scanner.c @@ -449,13 +449,16 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t if (scanner->state == SGML_STATE_ELEMENT) { /* Already inside an element so insert a tag end token * and continue scanning in next iteration. */ - string--; - real_length = 0; type = SGML_TOKEN_TAG_END; scanner_state = SGML_STATE_TEXT; /* We are creating a 'virtual' that has no source. */ possibly_incomplete = 0; + string = token->string.string; + real_length = 0; + + } else if (string == scanner->end) { + /* It is incomplete. */ } else if (is_sgml_ident(*string)) { token->string.string = string; @@ -540,6 +543,29 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t possibly_incomplete = 0; } + if (scanner->check_complete && scanner->incomplete) { + /* We need to fit both the process target token + * and the process data token into the scanner + * table. */ + if (token + 1 >= scanner->table + DOM_SCANNER_TOKENS) { + possibly_incomplete = 1; + + } else if (!possibly_incomplete) { + /* FIXME: We do this twice. */ + for (pos = string + 1; + (pos = skip_sgml_chars(scanner, pos, '>')); + pos++) { + if (pos[-1] == '?') + break; + } + if (!pos) + possibly_incomplete = 1; + } + + if (possibly_incomplete) + string = scanner->end; + } + } else if (*string == '/') { string++; skip_sgml_space(scanner, &string); @@ -641,6 +667,10 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t /* We found the end. */ possibly_incomplete = 0; + } else if (scanner->check_complete && scanner->incomplete) { + /* Force an incomplete token. */ + string = scanner->end; + } else if (is_sgml_attribute(*string)) { token->string.string++; scan_sgml_attribute(scanner, string); @@ -698,9 +728,9 @@ scan_sgml_proc_inst_token(struct dom_scanner *scanner, struct dom_scanner_token { unsigned char *string = scanner->position; /* The length can be empty for ''. */ - size_t length = -1; + ssize_t length = -1; - token->string.string = string; + token->string.string = string++; /* Figure out where the processing instruction ends. This doesn't use * skip_sgml() since we MUST ignore precedence here to allow '<' inside diff --git a/src/dom/test/test-sgml-parser-basic b/src/dom/test/test-sgml-parser-basic index 1a22b7fed..f19f158eb 100755 --- a/src/dom/test/test-sgml-parser-basic +++ b/src/dom/test/test-sgml-parser-basic @@ -56,7 +56,7 @@ element: root #text: a' test_output_equals \ -'Parse tag soup elements.' \ +'Parse tag soup elements. (I)' \ 'a' \ ' element: parent @@ -65,6 +65,14 @@ element: parent element: child:2 #text: a' +test_output_equals \ +'Parse tag soup elements. (II)' \ +'< a >< b < c / >< / >' \ +' +element: a + element: b + element: c' + test_output_equals \ 'Parse an enclosed comment.' \ '' \ diff --git a/src/dom/test/test-sgml-parser-incremental b/src/dom/test/test-sgml-parser-incremental index a9896e52b..aa6ea64b1 100755 --- a/src/dom/test/test-sgml-parser-incremental +++ b/src/dom/test/test-sgml-parser-incremental @@ -11,9 +11,8 @@ parsing. . "$TEST_LIB" -test_output_equals () { +test_incremental_parsing () { desc="$1"; shift - size="$1"; shift src="$1"; shift out="$1"; shift @@ -25,25 +24,242 @@ test_output_equals () { y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/; s/[^a-zA-Z0-9-]//g;')" - echo "$src" | sgml-parser --uri "$URI" --stdin "$size" \ - | sed -e 's/^ //' | sed -n '$d;p' > output echo "#document: $URI" > expected echo "$out" | sed -n '2,$p' >> expected - test_expect_success "$desc" 'cmp output expected' + for size in 1 2 3 4 5 6 7 8 9 10 15 20 25 50; do + echo -n "$src" | sgml-parser --uri "$URI" --stdin "$size" \ + | sed -e 's/^ //' > output + + test_run_ 'cmp output expected' + if [ "$?" != 0 -o "$eval_ret" != 0 ] + then + test_failure_ "$desc" "($size bytes)" + return + fi + done + + test_ok_ "$desc" } -for i in 25 20 15 10 9 8 7 6 5 4 3 2 1; do - test_output_equals \ - "Incrementally parse a small document reading $i bytes at a time." \ - "$i" \ - '

Hello World!

' \ +test_incremental_parsing \ +"Parse a small document." \ +'

Hello World!

' \ ' element: html element: body element: p #text: Hello World!' -done +test_incremental_parsing \ +'Parse elements.' \ +'a' \ +' +element: root + element: child + attribute: attr -> value + element: child2 + element: child3 + #text: a' + +test_incremental_parsing \ +'Parse tag soup elements.' \ +'a' \ +' +element: parent + attribute: attr -> value + element: child:1 + element: child:2 + #text: a' + +test_incremental_parsing \ +'Parse an enclosed comment.' \ +'' \ +' +element: root + #comment: Hello World! ' + +test_incremental_parsing \ +'Parse comment combinations. (I)' \ +'' \ +' +element: root + #comment: s-->tu' \ +' +#comment: comment +#text: s +#comment: > +#text: t +#comment: - +#text: u' + +test_incremental_parsing \ +'Parse bad comment. (I)' \ +'s' \ +' +#comment: ->s' + +test_incremental_parsing \ +'Parse bad comment. (II)' \ +'bad comment' \ +' +#comment: a +#text: bad comment' + +test_incremental_parsing \ +'Parse empty notation.' \ +'s' \ +' +#text: s' + +test_incremental_parsing \ +'Parse an enclosed CDATA section.' \ +'...]]>' \ +' +element: root + #cdata-section: ...] ]>...' + +test_incremental_parsing \ +'Parse non-enclosed CDATA section.' \ +'' \ +' +#cdata-section: ...' + +test_incremental_parsing \ +'Parse a bad CDATA section.' \ +'' \ +' +element: root + attribute: lang -> fr + attribute: attr -> + attribute: name -> value with &foo; " \ +' +element: root + attribute: a -> b + attribute: c -> d + attribute: g -> h + attribute: i -> j + attribute: k -> ' + +test_incremental_parsing \ +'Parse attribute with non-quoted values.' \ +'...' \ +' +element: root + attribute: color -> #abc + attribute: path -> /to/%61-&\one";files +#text: ...' + +test_incremental_parsing \ +'Parse entity references.' \ +'&-*' \ +' +entity-reference: amp +#text: - +entity-reference: #42' + +# Just how these should be gracefully handled is not clear to me. +test_incremental_parsing \ +'Parse badly formatted entity references.' \ +'& m33p;-&.:-copy;-&;-&#;-&#xx;' \ +' +#text: & m33p; +#text: - +entity-reference: .:-copy +#text: - +#text: &; +#text: - +entity-reference: # +#text: - +entity-reference: #xx' + +test_incremental_parsing \ +'Parse processing instructions.' \ +' +... +' \ +' +proc-instruction: xml -> encoding="UTF8" + attribute: encoding -> UTF8 +#text: \n...\n +proc-instruction: ecmascript -> var val=2;\n' + +test_incremental_parsing \ +'Parse XML processing instructions.' \ +'?>-' \ +' +proc-instruction: xml -> version="1.0" /> + attribute: version -> 1.0 +proc-instruction: xml -> />-' + +test_incremental_parsing \ +'Parse XML stylesheet processing instructions.' \ +'' \ +' +proc-instruction: xml-stylesheet -> type="text/xsl" href="url" + attribute: type -> text/xsl + attribute: href -> url' + +test_incremental_parsing \ +'Parse exotic processing instructions.' \ +'+?>-?>---' \ +' +proc-instruction: xml -> ?+>+ +#text: -?>- +proc-instruction: js -> +#text: - +proc-instruction: -> +#text: -' + +test_incremental_parsing \ +'Parse incorrect processing instructions.' \ +'--- < +#text: - +proc-instruction: -> <=";& +#text: -' + +test_incremental_parsing \ +'Parse incorrect processing instructions (II).' \ +' >< / root >' \ +' +element: root + attribute: ns:attr -> value + proc-instruction: target -> data' + test_done diff --git a/src/protocol/fsp/fsp.c b/src/protocol/fsp/fsp.c index d343d8e11..4696657ce 100644 --- a/src/protocol/fsp/fsp.c +++ b/src/protocol/fsp/fsp.c @@ -296,7 +296,7 @@ end: abort_connection(conn, S_OUT_OF_MEM); return; } - read_from_socket(conn->data_socket, buf, S_CONN, fsp_got_data); + read_from_socket(conn->data_socket, buf, S_CONN, fsp_got_data); } #undef READ_SIZE diff --git a/src/scripting/smjs/Makefile b/src/scripting/smjs/Makefile index 7ff12a144..1689f4789 100644 --- a/src/scripting/smjs/Makefile +++ b/src/scripting/smjs/Makefile @@ -4,6 +4,6 @@ include $(top_builddir)/Makefile.config INCLUDES += $(SPIDERMONKEY_CFLAGS) OBJS = smjs.o core.o global_object.o hooks.o elinks_object.o cache_object.o \ - bookmarks.o keybinding.o + view_state_object.o bookmarks.o keybinding.o include $(top_srcdir)/Makefile.lib diff --git a/src/scripting/smjs/hooks.c b/src/scripting/smjs/hooks.c index 91a768a41..03097ef01 100644 --- a/src/scripting/smjs/hooks.c +++ b/src/scripting/smjs/hooks.c @@ -12,10 +12,13 @@ #include "main/event.h" #include "main/module.h" #include "scripting/smjs/cache_object.h" +#include "scripting/smjs/view_state_object.h" #include "scripting/smjs/core.h" #include "scripting/smjs/elinks_object.h" #include "scripting/smjs/hooks.h" +#include "session/location.h" #include "session/session.h" +#include "viewer/text/vs.h" static enum evhook_status @@ -88,8 +91,8 @@ script_hook_pre_format_html(va_list ap, void *data) struct session *ses = va_arg(ap, struct session *); struct cache_entry *cached = va_arg(ap, struct cache_entry *); enum evhook_status ret = EVENT_HOOK_STATUS_NEXT; - JSObject *cache_entry_object; - jsval args[1], rval; + JSObject *cache_entry_object, *view_state_object = JSVAL_NULL; + jsval args[2], rval; evhook_use_params(ses && cached); @@ -97,13 +100,20 @@ script_hook_pre_format_html(va_list ap, void *data) smjs_ses = ses; + if (have_location(ses)) { + struct view_state *vs = &cur_loc(ses)->vs; + + view_state_object = smjs_get_view_state_object(vs); + } + cache_entry_object = smjs_get_cache_entry_object(cached); if (!cache_entry_object) goto end; args[0] = OBJECT_TO_JSVAL(cache_entry_object); + args[1] = OBJECT_TO_JSVAL(view_state_object); if (JS_TRUE == smjs_invoke_elinks_object_method("preformat_html", - args, 1, &rval)) + args, 2, &rval)) if (JS_FALSE == JSVAL_TO_BOOLEAN(rval)) ret = EVENT_HOOK_STATUS_LAST; diff --git a/src/scripting/smjs/view_state_object.c b/src/scripting/smjs/view_state_object.c new file mode 100644 index 000000000..a40457e76 --- /dev/null +++ b/src/scripting/smjs/view_state_object.c @@ -0,0 +1,109 @@ +/* Exports struct view_state to the world of ECMAScript */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "elinks.h" + +#include "ecmascript/spidermonkey/util.h" +#include "protocol/uri.h" +#include "scripting/smjs/view_state_object.h" +#include "scripting/smjs/core.h" +#include "util/error.h" +#include "util/memory.h" +#include "viewer/text/vs.h" + +enum view_state_prop { + VIEW_STATE_PLAIN, + VIEW_STATE_URI, +}; + +static const JSPropertySpec view_state_props[] = { + { "plain", VIEW_STATE_PLAIN, JSPROP_ENUMERATE }, + { "uri", VIEW_STATE_URI, JSPROP_ENUMERATE | JSPROP_READONLY }, + { NULL } +}; + +static JSBool +view_state_get_property(JSContext *ctx, JSObject *obj, jsval id, jsval *vp) +{ + struct view_state *vs = JS_GetPrivate(ctx, obj); + + undef_to_jsval(ctx, vp); + + if (!JSVAL_IS_INT(id)) + return JS_FALSE; + + switch (JSVAL_TO_INT(id)) { + case VIEW_STATE_PLAIN: + *vp = INT_TO_JSVAL(vs->plain); + + return JS_TRUE; + case VIEW_STATE_URI: + *vp = STRING_TO_JSVAL(JS_NewStringCopyZ(smjs_ctx, + struri(vs->uri))); + + return JS_TRUE; + default: + INTERNAL("Invalid ID %d in view_state_get_property().", + JSVAL_TO_INT(id)); + } + + return JS_FALSE; +} + +static JSBool +view_state_set_property(JSContext *ctx, JSObject *obj, jsval id, jsval *vp) +{ + struct view_state *vs = JS_GetPrivate(ctx, obj); + + if (!JSVAL_IS_INT(id)) + return JS_FALSE; + + switch (JSVAL_TO_INT(id)) { + case VIEW_STATE_PLAIN: { + vs->plain = atol(jsval_to_string(ctx, vp)); + + return JS_TRUE; + } + default: + INTERNAL("Invalid ID %d in view_state_set_property().", + JSVAL_TO_INT(id)); + } + + return JS_FALSE; +} + +static const JSClass view_state_class = { + "view_state", + JSCLASS_HAS_PRIVATE, + JS_PropertyStub, JS_PropertyStub, + view_state_get_property, view_state_set_property, + JS_EnumerateStub, JS_ResolveStub, JS_ConvertStub, JS_FinalizeStub +}; + +JSObject * +smjs_get_view_state_object(struct view_state *vs) +{ + JSObject *view_state_object; + + assert(smjs_ctx); + + view_state_object = JS_NewObject(smjs_ctx, + (JSClass *) &view_state_class, + NULL, NULL); + + if (!view_state_object) return NULL; + + if (JS_FALSE == JS_SetPrivate(smjs_ctx, view_state_object, vs)) + return NULL; + + if (JS_FALSE == JS_DefineProperties(smjs_ctx, view_state_object, + (JSPropertySpec *) view_state_props)) + return NULL; + + return view_state_object; +} diff --git a/src/scripting/smjs/view_state_object.h b/src/scripting/smjs/view_state_object.h new file mode 100644 index 000000000..daf770eb6 --- /dev/null +++ b/src/scripting/smjs/view_state_object.h @@ -0,0 +1,9 @@ +#ifndef EL__SCRIPTING_SMJS_VIEW_STATE_OBJECT_H +#define EL__SCRIPTING_SMJS_VIEW_STATE_OBJECT_H + +struct view_state; + +JSObject *smjs_get_view_state_object(struct view_state *vs); + +#endif +