mirror of
https://github.com/rkd77/elinks.git
synced 2025-01-03 14:57:44 -05:00
Add basic support for detection errors while scanning
It mostly uses the checking for incompleteness already in place. Tested lightly so it will definately need some more work.
This commit is contained in:
parent
5defc48eb3
commit
c993a0012e
@ -117,6 +117,35 @@ set_sgml_incomplete(struct dom_scanner *scanner, struct dom_scanner_token *token
|
||||
scanner->position = scanner->end;
|
||||
}
|
||||
|
||||
|
||||
static inline int
|
||||
check_sgml_error(struct dom_scanner *scanner)
|
||||
{
|
||||
unsigned int found_error = scanner->found_error;
|
||||
|
||||
/* Toggle if we found an error previously. */
|
||||
scanner->found_error = 0;
|
||||
|
||||
return scanner->detect_errors && !found_error;
|
||||
}
|
||||
|
||||
static struct dom_scanner_token *
|
||||
set_sgml_error(struct dom_scanner *scanner, unsigned char *end)
|
||||
{
|
||||
struct dom_scanner_token *token = scanner->current;
|
||||
|
||||
assert(!scanner->found_error);
|
||||
|
||||
scanner->found_error = 1;
|
||||
|
||||
token->type = SGML_TOKEN_ERROR;
|
||||
token->lineno = scanner->lineno;
|
||||
set_dom_string(&token->string, scanner->position, end - scanner->position);
|
||||
|
||||
return scanner->found_error ? NULL : scanner->current;
|
||||
}
|
||||
|
||||
|
||||
/* Text token scanning */
|
||||
|
||||
/* I think it is faster to not check the table here --jonas */
|
||||
@ -155,9 +184,17 @@ scan_sgml_text_token(struct dom_scanner *scanner, struct dom_scanner_token *toke
|
||||
}
|
||||
|
||||
/* We want the biggest possible text token. */
|
||||
if (check_sgml_incomplete(scanner, string) && !complete) {
|
||||
set_sgml_incomplete(scanner, token);
|
||||
return;
|
||||
if (!complete) {
|
||||
if (check_sgml_incomplete(scanner, string)) {
|
||||
set_sgml_incomplete(scanner, token);
|
||||
return;
|
||||
}
|
||||
|
||||
if (check_sgml_error(scanner)) {
|
||||
token = set_sgml_error(scanner, string);
|
||||
if (!token)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
@ -564,9 +601,44 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
|
||||
}
|
||||
}
|
||||
|
||||
if (possibly_incomplete && check_sgml_incomplete(scanner, string)) {
|
||||
set_sgml_incomplete(scanner, token);
|
||||
return;
|
||||
if (possibly_incomplete) {
|
||||
if (check_sgml_incomplete(scanner, string)) {
|
||||
set_sgml_incomplete(scanner, token);
|
||||
return;
|
||||
}
|
||||
|
||||
if (check_sgml_error(scanner)) {
|
||||
unsigned char *end = string;
|
||||
|
||||
DBG("%d %d", type, SGML_TOKEN_NOTATION_DOCTYPE);
|
||||
switch (type) {
|
||||
case SGML_TOKEN_CDATA_SECTION:
|
||||
case SGML_TOKEN_NOTATION_ATTLIST:
|
||||
case SGML_TOKEN_NOTATION_DOCTYPE:
|
||||
case SGML_TOKEN_NOTATION_ELEMENT:
|
||||
if (scanner->position + 9 < end)
|
||||
end = scanner->position + 9;
|
||||
break;
|
||||
|
||||
case SGML_TOKEN_NOTATION_COMMENT:
|
||||
/* Just include the '<!--' part. */
|
||||
if (scanner->position + 4 < end)
|
||||
end = scanner->position + 4;
|
||||
break;
|
||||
|
||||
case SGML_TOKEN_NOTATION_ENTITY:
|
||||
if (scanner->position + 6 < end)
|
||||
end = scanner->position + 6;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
token = set_sgml_error(scanner, end);
|
||||
if (!token)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
token->type = type;
|
||||
@ -607,6 +679,12 @@ scan_sgml_proc_inst_token(struct dom_scanner *scanner, struct dom_scanner_token
|
||||
set_sgml_incomplete(scanner, token);
|
||||
return;
|
||||
}
|
||||
|
||||
if (check_sgml_error(scanner)) {
|
||||
token = set_sgml_error(scanner, scanner->position + 2);
|
||||
if (!token)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
token->type = SGML_TOKEN_PROCESS_DATA;
|
||||
|
Loading…
Reference in New Issue
Block a user