diff --git a/AUTHORS b/AUTHORS index cf076cf8d..d63f61274 100644 --- a/AUTHORS +++ b/AUTHORS @@ -161,6 +161,9 @@ Doug Kearns Edwin Groothuis Dump-width option +Eric Wald + Vim ftplugin to set ELinks coding style + Evan Hughes Bookmarks diff --git a/src/document/dom/renderer.c b/src/document/dom/renderer.c index 8f5cd1a2f..8d391e181 100644 --- a/src/document/dom/renderer.c +++ b/src/document/dom/renderer.c @@ -507,7 +507,7 @@ render_dom_node_enhanced_text(struct dom_renderer *renderer, struct dom_node *no } #endif -enum dom_stack_code +static enum dom_stack_code render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *data) { struct dom_renderer *renderer = stack->current->data; diff --git a/src/document/plain/renderer.c b/src/document/plain/renderer.c index 7adf15fac..c17785eb9 100644 --- a/src/document/plain/renderer.c +++ b/src/document/plain/renderer.c @@ -135,6 +135,10 @@ check_link_word(struct document *document, unsigned char *uri, int length, if (!where) return NULL; + /* We need to reparse the URI and normalize it so that the protocol and + * host part are converted to lowercase. */ + normalize_uri(NULL, where); + new_link = add_document_link(document, where, length, x, y); if (!new_link) mem_free(where); diff --git a/src/dom/configuration.c b/src/dom/configuration.c index b228f02e8..08af021df 100644 --- a/src/dom/configuration.c +++ b/src/dom/configuration.c @@ -304,7 +304,7 @@ struct dom_config_info { }; #define DOM_CONFIG(name, flag) \ - { INIT_DOM_STRING(name, -1), (flag) } + { STATIC_DOM_STRING(name), (flag) } static struct dom_config_info dom_config_info[] = { DOM_CONFIG("cdata-sections", DOM_CONFIG_CDATA_SECTIONS), diff --git a/src/dom/css/scanner.c b/src/dom/css/scanner.c index 48054f59f..a0e46e796 100644 --- a/src/dom/css/scanner.c +++ b/src/dom/css/scanner.c @@ -55,7 +55,7 @@ static const struct dom_scan_table_info css_scan_table_info[] = { }; #define CSS_STRING_MAP(str, type, family) \ - { INIT_DOM_STRING(str, -1), CSS_TOKEN_##type, CSS_TOKEN_##family } + { STATIC_DOM_STRING(str), CSS_TOKEN_##type, CSS_TOKEN_##family } static const struct dom_scanner_string_mapping css_string_mappings[] = { CSS_STRING_MAP("Hz", FREQUENCY, DIMENSION), diff --git a/src/dom/node.c b/src/dom/node.c index d0ac956ae..c21cbab9c 100644 --- a/src/dom/node.c +++ b/src/dom/node.c @@ -451,11 +451,11 @@ done_dom_node(struct dom_node *node) struct dom_string * get_dom_node_name(struct dom_node *node) { - static struct dom_string cdata_section_str = INIT_DOM_STRING("#cdata-section", -1); - static struct dom_string comment_str = INIT_DOM_STRING("#comment", -1); - static struct dom_string document_str = INIT_DOM_STRING("#document", -1); - static struct dom_string document_fragment_str = INIT_DOM_STRING("#document-fragment", -1); - static struct dom_string text_str = INIT_DOM_STRING("#text", -1); + static struct dom_string cdata_section_str = STATIC_DOM_STRING("#cdata-section"); + static struct dom_string comment_str = STATIC_DOM_STRING("#comment"); + static struct dom_string document_str = STATIC_DOM_STRING("#document"); + static struct dom_string document_fragment_str = STATIC_DOM_STRING("#document-fragment"); + static struct dom_string text_str = STATIC_DOM_STRING("#text"); assert(node); @@ -521,18 +521,18 @@ get_dom_node_type_name(enum dom_node_type type) { static struct dom_string dom_node_type_names[DOM_NODES] = { INIT_DOM_STRING(NULL, 0), - /* DOM_NODE_ELEMENT */ INIT_DOM_STRING("element", -1), - /* DOM_NODE_ATTRIBUTE */ INIT_DOM_STRING("attribute", -1), - /* DOM_NODE_TEXT */ INIT_DOM_STRING("text", -1), - /* DOM_NODE_CDATA_SECTION */ INIT_DOM_STRING("cdata-section", -1), - /* DOM_NODE_ENTITY_REFERENCE */ INIT_DOM_STRING("entity-reference", -1), - /* DOM_NODE_ENTITY */ INIT_DOM_STRING("entity", -1), - /* DOM_NODE_PROCESSING_INSTRUCTION */ INIT_DOM_STRING("proc-instruction", -1), - /* DOM_NODE_COMMENT */ INIT_DOM_STRING("comment", -1), - /* DOM_NODE_DOCUMENT */ INIT_DOM_STRING("document", -1), - /* DOM_NODE_DOCUMENT_TYPE */ INIT_DOM_STRING("document-type", -1), - /* DOM_NODE_DOCUMENT_FRAGMENT */ INIT_DOM_STRING("document-fragment", -1), - /* DOM_NODE_NOTATION */ INIT_DOM_STRING("notation", -1), + /* DOM_NODE_ELEMENT */ STATIC_DOM_STRING("element"), + /* DOM_NODE_ATTRIBUTE */ STATIC_DOM_STRING("attribute"), + /* DOM_NODE_TEXT */ STATIC_DOM_STRING("text"), + /* DOM_NODE_CDATA_SECTION */ STATIC_DOM_STRING("cdata-section"), + /* DOM_NODE_ENTITY_REFERENCE */ STATIC_DOM_STRING("entity-reference"), + /* DOM_NODE_ENTITY */ STATIC_DOM_STRING("entity"), + /* DOM_NODE_PROCESSING_INSTRUCTION */ STATIC_DOM_STRING("proc-instruction"), + /* DOM_NODE_COMMENT */ STATIC_DOM_STRING("comment"), + /* DOM_NODE_DOCUMENT */ STATIC_DOM_STRING("document"), + /* DOM_NODE_DOCUMENT_TYPE */ STATIC_DOM_STRING("document-type"), + /* DOM_NODE_DOCUMENT_FRAGMENT */ STATIC_DOM_STRING("document-fragment"), + /* DOM_NODE_NOTATION */ STATIC_DOM_STRING("notation"), }; assert(type < DOM_NODES); diff --git a/src/dom/scanner.h b/src/dom/scanner.h index 54c3096d7..2a7e61b9b 100644 --- a/src/dom/scanner.h +++ b/src/dom/scanner.h @@ -67,7 +67,7 @@ struct dom_scanner_string_mapping { }; #define DOM_STRING_MAP(str, type, family) \ - { INIT_DOM_STRING(str, -1), (type), (family) } + { STATIC_DOM_STRING(str), (type), (family) } #define DOM_STRING_MAP_END \ { INIT_DOM_STRING(NULL, 0), 0, 0 } diff --git a/src/dom/select.c b/src/dom/select.c index e1e4d548d..af60c4d5e 100644 --- a/src/dom/select.c +++ b/src/dom/select.c @@ -28,7 +28,7 @@ get_dom_select_pseudo(struct dom_scanner_token *token) } pseudo_info[] = { #define INIT_DOM_SELECT_PSEUDO_STRING(str, type) \ - { INIT_DOM_STRING(str, -1), DOM_SELECT_PSEUDO_##type } + { STATIC_DOM_STRING(str), DOM_SELECT_PSEUDO_##type } INIT_DOM_SELECT_PSEUDO_STRING("first-line", FIRST_LINE), INIT_DOM_SELECT_PSEUDO_STRING("first-letter", FIRST_LETTER), diff --git a/src/dom/sgml/scanner.c b/src/dom/sgml/scanner.c index e2e7c5db5..a544a43f2 100644 --- a/src/dom/sgml/scanner.c +++ b/src/dom/sgml/scanner.c @@ -44,7 +44,7 @@ static struct dom_scan_table_info sgml_scan_table_info[] = { }; #define SGML_STRING_MAP(str, type, family) \ - { INIT_DOM_STRING(str, -1), SGML_TOKEN_##type, SGML_TOKEN_##family } + { STATIC_DOM_STRING(str), SGML_TOKEN_##type, SGML_TOKEN_##family } static struct dom_scanner_string_mapping sgml_string_mappings[] = { SGML_STRING_MAP("--", NOTATION_COMMENT, NOTATION), @@ -458,7 +458,8 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t real_length = 0; } else if (string == scanner->end) { - /* It is incomplete. */ + /* It is incomplete so prevent out of bound acess to + * the scanned string. */ } else if (is_sgml_ident(*string)) { token->string.string = string; @@ -467,7 +468,7 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t real_length = string - token->string.string; skip_sgml_space(scanner, &string); - if (*string == '>') { + if (string < scanner->end && *string == '>') { type = SGML_TOKEN_ELEMENT; string++; @@ -570,7 +571,10 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t string++; skip_sgml_space(scanner, &string); - if (is_sgml_ident(*string)) { + if (string == scanner->end) { + /* Prevent out of bound access. */ + + } else if (is_sgml_ident(*string)) { token->string.string = string; scan_sgml(scanner, string, SGML_CHAR_IDENT); real_length = string - token->string.string; @@ -634,7 +638,10 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t * For stricter parsing we should always require attribute * values to be quoted. */ - if (*string == '>') { + if (string == scanner->end) { + /* Prevent out of bound access. */ + + } else if (*string == '>') { string++; real_length = 0; type = SGML_TOKEN_ELEMENT_EMPTY_END; @@ -671,7 +678,8 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t /* Force an incomplete token. */ string = scanner->end; - } else if (is_sgml_attribute(*string)) { + } else if (string < scanner->end + && is_sgml_attribute(*string)) { token->string.string++; scan_sgml_attribute(scanner, string); type = SGML_TOKEN_ATTRIBUTE; @@ -683,7 +691,8 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t type = SGML_TOKEN_IDENT; } - if (is_sgml_attribute(*string)) { + if (string < scanner->end + && is_sgml_attribute(*string)) { scan_sgml_attribute(scanner, string); type = SGML_TOKEN_ATTRIBUTE; if (string[-1] == '/' && string[0] == '>') { diff --git a/src/dom/sgml/sgml.h b/src/dom/sgml/sgml.h index 34e941e61..ffac94158 100644 --- a/src/dom/sgml/sgml.h +++ b/src/dom/sgml/sgml.h @@ -55,10 +55,10 @@ struct sgml_node_info { { INIT_DOM_STRING(NULL, doctype##_##nodetype##S - 1), doctype##_##nodetype##_UNKNOWN } #define SGML_NODE_INFO(doctype, nodetype, name, data) \ - { INIT_DOM_STRING(#name, sizeof(#name) - 1), doctype##_##nodetype##_##name, data } + { STATIC_DOM_STRING(#name), doctype##_##nodetype##_##name, data } #define SGML_NODE_INF2(doctype, nodetype, name, ident, data) \ - { INIT_DOM_STRING(ident, sizeof(ident) - 1), doctype##_##nodetype##_##name, data } + { STATIC_DOM_STRING(ident), doctype##_##nodetype##_##name, data } #define SGML_NODE_INFO_TYPE(doctype, nodetype, name) doctype##_##nodetype##_##name diff --git a/src/dom/string.h b/src/dom/string.h index 15a15cfde..4e04e9d86 100644 --- a/src/dom/string.h +++ b/src/dom/string.h @@ -3,13 +3,22 @@ #include "util/memory.h" +/* For now DOM has it's own little string library. Mostly because there are + * some memory overhead associated with util/string's block-based allocation + * scheme which is optimized for building strings and quickly dispose of it. + * Also, at some point we need to switch to use mainly UTF-8 strings for DOM + * and it needs to be possible to adapt the string library to that. --jonas */ + struct dom_string { size_t length; unsigned char *string; }; #define INIT_DOM_STRING(strvalue, strlength) \ - { (strlength) == -1 ? sizeof(strvalue) - 1 : (strlength), (strvalue) } + { (strlength), (strvalue) } + +#define STATIC_DOM_STRING(strvalue) \ + { sizeof(strvalue) - 1, (strvalue) } static inline void set_dom_string(struct dom_string *string, unsigned char *value, size_t length) diff --git a/src/dom/test/dom-select.c b/src/dom/test/dom-select.c index 2121b43f5..e4cc6f238 100644 --- a/src/dom/test/dom-select.c +++ b/src/dom/test/dom-select.c @@ -38,9 +38,9 @@ main(int argc, char *argv[]) struct sgml_parser *parser; struct dom_select *select; enum sgml_document_type doctype = SGML_DOCTYPE_HTML; - struct dom_string uri = INIT_DOM_STRING("dom://test", -1); - struct dom_string source = INIT_DOM_STRING("(no source)", -1); - struct dom_string selector = INIT_DOM_STRING("(no select)", -1); + struct dom_string uri = STATIC_DOM_STRING("dom://test"); + struct dom_string source = STATIC_DOM_STRING("(no source)"); + struct dom_string selector = STATIC_DOM_STRING("(no select)"); int i; for (i = 1; i < argc; i++) { diff --git a/src/dom/test/sgml-parser.c b/src/dom/test/sgml-parser.c index 320758a6b..9ac6817a6 100644 --- a/src/dom/test/sgml-parser.c +++ b/src/dom/test/sgml-parser.c @@ -267,8 +267,8 @@ main(int argc, char *argv[]) int normalize = 0; int complete = 1; size_t read_stdin = 0; - struct dom_string uri = INIT_DOM_STRING("dom://test", -1); - struct dom_string source = INIT_DOM_STRING("(no source)", -1); + struct dom_string uri = STATIC_DOM_STRING("dom://test"); + struct dom_string source = STATIC_DOM_STRING("(no source)"); int i; for (i = 1; i < argc; i++) { diff --git a/src/dom/test/test-sgml-parser-lines b/src/dom/test/test-sgml-parser-lines index e021d59ff..6c0c71e11 100755 --- a/src/dom/test/test-sgml-parser-lines +++ b/src/dom/test/test-sgml-parser-lines @@ -55,5 +55,10 @@ test_output_line_numbers \ 7 8' \ 8 + +test_output_line_numbers \ +'Check line numbers. (IIII)' \ +$'1\r\f 2\v\n 3\r\n 4\t\f 5' \ +5 test_done diff --git a/src/mime/mime.c b/src/mime/mime.c index bbeca78b3..373729110 100644 --- a/src/mime/mime.c +++ b/src/mime/mime.c @@ -224,12 +224,6 @@ get_cache_header_content_type(struct cache_entry *cached) return NULL; } -unsigned char * -get_default_content_type(void) -{ - return get_default_mime_type(); -} - unsigned char * get_content_type(struct cache_entry *cached) { diff --git a/src/mime/mime.h b/src/mime/mime.h index 8584ee4f1..779ed3b82 100644 --- a/src/mime/mime.h +++ b/src/mime/mime.h @@ -20,9 +20,6 @@ extern struct module mime_module; * scanning the uri for extensions. */ unsigned char *get_content_type(struct cache_entry *cached); -/* Default mime type */ -unsigned char *get_default_content_type(void); - /* Guess content type by looking at configurations of the given @extension */ unsigned char *get_extension_content_type(unsigned char *extension); diff --git a/src/protocol/fsp/fsp.c b/src/protocol/fsp/fsp.c index d3613cd8a..2ced2e6e7 100644 --- a/src/protocol/fsp/fsp.c +++ b/src/protocol/fsp/fsp.c @@ -27,7 +27,6 @@ #include "intl/gettext/libintl.h" #include "main/module.h" #include "main/select.h" -#include "mime/mime.h" #include "network/connection.h" #include "network/socket.h" #include "osdep/osdep.h" @@ -181,31 +180,6 @@ end: exit(0); } -static unsigned char * -get_content_type_uri(struct uri *uri) -{ - unsigned char *extension = get_extension_from_uri(uri); - - if (extension) { - unsigned char *ctype; - /* XXX: A little hack for making extension handling case - * insensitive. We could probably do it better by making - * guess_encoding() case independent the real problem however - * is with default (via option system) and mimetypes resolving - * doing that option and hash lookup will not be easy to - * convert. --jonas */ - convert_to_lowercase(extension, strlen(extension)); - - ctype = get_extension_content_type(extension); - if (ctype && *ctype) { - return ctype; - } - } - - return get_default_content_type(); - -} - #define READ_SIZE 4096 static void @@ -230,13 +204,18 @@ do_fsp(struct connection *conn) FSP_FILE *file = fsp_fopen(ses, data, "r"); int r; - fprintf(stderr, "%s", get_content_type_uri(uri)); - fclose(stderr); if (!file) fsp_error("fsp_fopen error."); + /* Use the default way to find the MIME type, so write an + * 'empty' name, since something needs to be written in order + * to avoid socket errors. */ + fprintf(stderr, "%c", '\0'); + fclose(stderr); + while ((r = fsp_fread(buf, 1, READ_SIZE, file)) > 0) fwrite(buf, 1, r, stdout); + fsp_fclose(file); fsp_close_session(ses); exit(0); @@ -274,7 +253,6 @@ fsp_got_data(struct socket *socket, struct read_buffer *rb) static void fsp_got_header(struct socket *socket, struct read_buffer *rb) { - int len = rb->length; struct connection *conn = socket->conn; struct read_buffer *buf; @@ -286,10 +264,16 @@ fsp_got_header(struct socket *socket, struct read_buffer *rb) return; } socket->state = SOCKET_END_ONCLOSE; - if (len <= 0) goto end; - rb->data[len] = '\0'; - mem_free_set(&conn->cached->content_type, stracpy(rb->data)); -end: + + if (rb->length > 0) { + unsigned char *ctype = memacpy(rb->data, rb->length); + + if (ctype && *ctype) + mem_free_set(&conn->cached->content_type, ctype); + else + mem_free_if(ctype); + } + buf = alloc_read_buffer(conn->data_socket); if (!buf) { close(socket->fd);