diff --git a/src/dom/scanner.c b/src/dom/scanner.c index aea8e2d1..c3cd1baa 100644 --- a/src/dom/scanner.c +++ b/src/dom/scanner.c @@ -155,7 +155,7 @@ init_dom_scanner_info(struct dom_scanner_info *scanner_info) void init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_info, struct dom_string *string, int state, int count_lines, int complete, - int check_complete) + int check_complete, int detect_errors) { if (!scanner_info->initialized) { init_dom_scanner_info(scanner_info); @@ -173,6 +173,7 @@ init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_i scanner->count_lines = !!count_lines; scanner->incomplete = !complete; scanner->check_complete = !!check_complete; + scanner->detect_errors = !!detect_errors; scanner->lineno = scanner->count_lines; scanner->info->scan(scanner); } diff --git a/src/dom/scanner.h b/src/dom/scanner.h index cb9906d4..eabe9574 100644 --- a/src/dom/scanner.h +++ b/src/dom/scanner.h @@ -20,6 +20,9 @@ struct dom_scanner_token { /* Some precedence value */ int precedence; + /* The line number; used for error tokens */ + unsigned int lineno; + /* The start of the token string and the token length */ struct dom_string string; }; @@ -93,7 +96,7 @@ struct dom_scanner_info { /* Initializes the scanner. */ void init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_info, struct dom_string *string, int state, int count_lines, int complete, - int check_complete); + int check_complete, int detect_error); /* The number of tokens in the scanners token table: * At best it should be big enough to contain properties with space separated @@ -130,6 +133,9 @@ struct dom_scanner { unsigned int check_complete:1; /* Only generate complete tokens */ unsigned int incomplete:1; /* The scanned string is incomplete */ + unsigned int detect_errors:1; /* Check for markup errors */ + unsigned int found_error; /* Did we already report this error? */ + unsigned int count_lines:1; /* Is line counting enbaled? */ unsigned int lineno; /* Line # of the last scanned token */ diff --git a/src/dom/select.c b/src/dom/select.c index 1c53f61c..a0e5d764 100644 --- a/src/dom/select.c +++ b/src/dom/select.c @@ -391,7 +391,7 @@ parse_dom_select(struct dom_select *select, struct dom_stack *stack, struct dom_scanner scanner; struct dom_select_node sel; - init_dom_scanner(&scanner, &dom_css_scanner_info, string, 0, 0, 1, 0); + init_dom_scanner(&scanner, &dom_css_scanner_info, string, 0, 0, 1, 0, 0); memset(&sel, 0, sizeof(sel)); diff --git a/src/dom/sgml/parser.c b/src/dom/sgml/parser.c index 0c5b4a3b..81227d09 100644 --- a/src/dom/sgml/parser.c +++ b/src/dom/sgml/parser.c @@ -154,6 +154,17 @@ add_sgml_node(struct dom_stack *stack, enum dom_node_type type, struct dom_scann /* SGML parser main handling: */ +static enum sgml_parser_code +call_sgml_error_function(struct dom_stack *stack, struct dom_scanner_token *token) +{ + struct sgml_parser *parser = get_sgml_parser(stack); + unsigned int line = get_sgml_parser_line_number(parser); + + assert(parser->error_func); + + return parser->error_func(parser, &token->string, line); +} + static inline enum sgml_parser_code parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner) { @@ -217,6 +228,17 @@ parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner) case SGML_TOKEN_INCOMPLETE: return SGML_PARSER_CODE_INCOMPLETE; + case SGML_TOKEN_ERROR: + { + enum sgml_parser_code code; + + code = call_sgml_error_function(stack, token); + if (code != SGML_PARSER_CODE_OK) + return code; + + skip_dom_scanner_token(scanner); + break; + } default: skip_dom_scanner_token(scanner); } @@ -326,7 +348,7 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner) /* The attribute souce is complete. */ init_dom_scanner(&attr_scanner, &sgml_scanner_info, &token->string, SGML_STATE_ELEMENT, - scanner->count_lines, 1, 0); + scanner->count_lines, 1, 0, 0); if (dom_scanner_has_tokens(&attr_scanner)) { /* Ignore parser codes from this @@ -350,6 +372,17 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner) case SGML_TOKEN_INCOMPLETE: return SGML_PARSER_CODE_INCOMPLETE; + case SGML_TOKEN_ERROR: + { + enum sgml_parser_code code; + + code = call_sgml_error_function(stack, token); + if (code != SGML_PARSER_CODE_OK) + return code; + + skip_dom_scanner_token(scanner); + break; + } case SGML_TOKEN_SPACE: case SGML_TOKEN_TEXT: default: @@ -403,11 +436,13 @@ sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data) int count_lines = !!(parser->flags & SGML_PARSER_COUNT_LINES); int complete = !!(parser->flags & SGML_PARSER_COMPLETE); int incremental = !!(parser->flags & SGML_PARSER_INCREMENTAL); + int detect_errors = !!(parser->flags & SGML_PARSER_DETECT_ERRORS); parsing->depth = parser->stack.depth; get_dom_stack_top(&parser->stack)->immutable = 1; init_dom_scanner(&parsing->scanner, &sgml_scanner_info, &node->string, - SGML_STATE_TEXT, count_lines, complete, incremental); + SGML_STATE_TEXT, count_lines, complete, incremental, + detect_errors); } static void @@ -494,6 +529,11 @@ get_sgml_parser_line_number(struct sgml_parser *parser) assert(pstate->scanner.count_lines && pstate->scanner.lineno); + if (pstate->scanner.current + && pstate->scanner.current < pstate->scanner.table + DOM_SCANNER_TOKENS + && pstate->scanner.current->type == SGML_TOKEN_ERROR) + return pstate->scanner.current->lineno; + return pstate->scanner.lineno; } @@ -553,6 +593,9 @@ init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype, return NULL; } + if (flags & SGML_PARSER_DETECT_ERRORS) + flags |= SGML_PARSER_COUNT_LINES; + parser->type = type; parser->flags = flags; parser->info = get_sgml_info(doctype); diff --git a/src/dom/sgml/parser.h b/src/dom/sgml/parser.h index 4764c91b..b81c6a93 100644 --- a/src/dom/sgml/parser.h +++ b/src/dom/sgml/parser.h @@ -7,6 +7,7 @@ #include "dom/sgml/sgml.h" #include "dom/scanner.h" +struct sgml_parser; struct string; struct uri; @@ -26,6 +27,7 @@ enum sgml_parser_flag { SGML_PARSER_COUNT_LINES = 1, SGML_PARSER_COMPLETE = 2, SGML_PARSER_INCREMENTAL = 4, + SGML_PARSER_DETECT_ERRORS = 8, }; struct sgml_parser_state { @@ -48,6 +50,11 @@ enum sgml_parser_code { SGML_PARSER_CODE_ERROR, }; +/* If the return code is not SGML_PARSER_CODE_OK the parsing will be ended and + * that code will be returned. */ +typedef enum sgml_parser_code +(*sgml_error_T)(struct sgml_parser *, struct dom_string *, unsigned int); + struct sgml_parser { enum sgml_parser_type type; /* Stream or tree */ enum sgml_parser_flag flags; /* Flags that control the behaviour */ @@ -57,6 +64,8 @@ struct sgml_parser { struct dom_string uri; /* The URI of the DOM document */ struct dom_node *root; /* The document root node */ + sgml_error_T error_func; + struct dom_stack stack; /* A stack for tracking parsed nodes */ struct dom_stack parsing; /* Used for tracking parsing states */ }; diff --git a/src/dom/sgml/scanner.h b/src/dom/sgml/scanner.h index 3857ad78..fc58b0aa 100644 --- a/src/dom/sgml/scanner.h +++ b/src/dom/sgml/scanner.h @@ -52,6 +52,10 @@ enum sgml_token_type { * not complete. Only meaningful if scanner->complete is incomplete. */ SGML_TOKEN_INCOMPLETE, + /* A special token for reporting that an error in the markup was found. + * Only in effect when error checking has been requested. */ + SGML_TOKEN_ERROR, + /* Token type used internally when scanning to signal that the token * should not be recorded in the scanners token table. */ SGML_TOKEN_SKIP,