1
0
mirror of https://github.com/rkd77/elinks.git synced 2025-02-02 15:09:23 -05:00

Make the SGML scanner (optionally) keep track of line numbers

A new line is either \n or \f. The main logic for counting lines is in
skip_sgml{,_chars,_space}. For the general case where line numbers are not
wanted the code tries to avoid the extra checks for newlines.

This will be useful for reporting errors when loading the XBEL file.
This commit is contained in:
Jonas Fonseca 2005-12-31 02:46:56 +01:00 committed by Jonas Fonseca
parent b23beed031
commit a578ed4667
5 changed files with 43 additions and 9 deletions

View File

@ -154,7 +154,7 @@ init_dom_scanner_info(struct dom_scanner_info *scanner_info)
void
init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_info,
struct dom_string *string, int state)
struct dom_string *string, int state, int count_lines)
{
if (!scanner_info->initialized) {
init_dom_scanner_info(scanner_info);
@ -169,5 +169,7 @@ init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_i
scanner->current = scanner->table;
scanner->info = scanner_info;
scanner->state = state;
scanner->count_lines = !!count_lines;
scanner->lineno = scanner->count_lines;
scanner->info->scan(scanner);
}

View File

@ -92,7 +92,7 @@ struct dom_scanner_info {
/* Initializes the scanner. */
void init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_info,
struct dom_string *string, int state);
struct dom_string *string, int state, int count_lines);
/* The number of tokens in the scanners token table:
* At best it should be big enough to contain properties with space separated
@ -123,6 +123,9 @@ struct dom_scanner {
int line;
#endif
unsigned int count_lines:1;
unsigned int lineno;
/* Some state indicator only meaningful to the scanner internals */
int state;

View File

@ -391,7 +391,7 @@ parse_dom_select(struct dom_select *select, struct dom_stack *stack,
struct dom_scanner scanner;
struct dom_select_node sel;
init_dom_scanner(&scanner, &dom_css_scanner_info, string, 0);
init_dom_scanner(&scanner, &dom_css_scanner_info, string, 0, 0);
memset(&sel, 0, sizeof(sel));

View File

@ -296,7 +296,8 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
struct dom_scanner attr_scanner;
init_dom_scanner(&attr_scanner, &sgml_scanner_info,
&token->string, SGML_STATE_ELEMENT);
&token->string, SGML_STATE_ELEMENT,
scanner->count_lines);
if (dom_scanner_has_tokens(&attr_scanner))
parse_sgml_attributes(stack, &attr_scanner);
@ -361,7 +362,7 @@ sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data)
parsing->depth = parser->stack.depth;
get_dom_stack_top(&parser->stack)->immutable = 1;
init_dom_scanner(&parsing->scanner, &sgml_scanner_info, &node->string,
SGML_STATE_TEXT);
SGML_STATE_TEXT, 0);
}
static void

View File

@ -36,7 +36,7 @@ static struct dom_scan_table_info sgml_scan_table_info[] = {
DOM_SCAN_TABLE_STRING("-_:.", SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
DOM_SCAN_TABLE_STRING("#", SGML_CHAR_ENTITY),
DOM_SCAN_TABLE_STRING(" \f\n\r\t\v", SGML_CHAR_WHITESPACE),
DOM_SCAN_TABLE_STRING("\f\n\r", SGML_CHAR_NEWLINE),
DOM_SCAN_TABLE_STRING("\f\n", SGML_CHAR_NEWLINE),
DOM_SCAN_TABLE_STRING("<&", SGML_CHAR_NOT_TEXT),
DOM_SCAN_TABLE_STRING("<=>", SGML_CHAR_NOT_ATTRIBUTE),
@ -74,6 +74,7 @@ struct dom_scanner_info sgml_scanner_info = {
#define is_sgml_ident(c) check_sgml_table(c, SGML_CHAR_IDENT)
#define is_sgml_entity(c) check_sgml_table(c, SGML_CHAR_ENTITY)
#define is_sgml_space(c) check_sgml_table(c, SGML_CHAR_WHITESPACE)
#define is_sgml_newline(c) check_sgml_table(c, SGML_CHAR_NEWLINE)
#define is_sgml_text(c) !check_sgml_table(c, SGML_CHAR_NOT_TEXT)
#define is_sgml_token_start(c) check_sgml_table(c, SGML_CHAR_TOKEN_START)
#define is_sgml_attribute(c) !check_sgml_table(c, SGML_CHAR_NOT_ATTRIBUTE | SGML_CHAR_WHITESPACE)
@ -83,7 +84,16 @@ skip_sgml_space(struct dom_scanner *scanner, unsigned char **string)
{
unsigned char *pos = *string;
scan_sgml(scanner, pos, SGML_CHAR_WHITESPACE);
if (!scanner->count_lines) {
scan_sgml(scanner, pos, SGML_CHAR_WHITESPACE);
} else {
while (pos < scanner->end && is_sgml_space(*pos)) {
if (is_sgml_newline(*pos))
scanner->lineno++;
pos++;
}
}
*string = pos;
}
@ -157,11 +167,26 @@ static inline unsigned char *
skip_sgml_chars(struct dom_scanner *scanner, unsigned char *string,
unsigned char skipto)
{
int newlines;
assert(string >= scanner->position && string <= scanner->end);
for (; string < scanner->end; string++) {
if (*string == skipto)
if (!scanner->count_lines) {
size_t length = scanner->end - string;
return memchr(string, skipto, length);
}
for (newlines = 0; string < scanner->end; string++) {
if (is_sgml_newline(*string))
newlines++;
if (*string == skipto) {
/* Only count newlines if we actually find the
* requested char. Else callers are assumed to discard
* the scanning. */
scanner->lineno += newlines;
return string;
}
}
return NULL;
@ -189,6 +214,9 @@ skip_sgml(struct dom_scanner *scanner, unsigned char **string, unsigned char ski
end = skip_sgml_chars(scanner, pos + 1, *pos);
if (end) pos = end;
} else if (scanner->count_lines && is_sgml_newline(*pos)) {
scanner->lineno++;
}
}