diff --git a/src/dom/scanner.c b/src/dom/scanner.c index b9e9d9798..e682a7d15 100644 --- a/src/dom/scanner.c +++ b/src/dom/scanner.c @@ -154,7 +154,7 @@ init_dom_scanner_info(struct dom_scanner_info *scanner_info) void init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_info, - struct dom_string *string, int state) + struct dom_string *string, int state, int count_lines) { if (!scanner_info->initialized) { init_dom_scanner_info(scanner_info); @@ -169,5 +169,7 @@ init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_i scanner->current = scanner->table; scanner->info = scanner_info; scanner->state = state; + scanner->count_lines = !!count_lines; + scanner->lineno = scanner->count_lines; scanner->info->scan(scanner); } diff --git a/src/dom/scanner.h b/src/dom/scanner.h index a0c58ee00..71978f351 100644 --- a/src/dom/scanner.h +++ b/src/dom/scanner.h @@ -92,7 +92,7 @@ struct dom_scanner_info { /* Initializes the scanner. */ void init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_info, - struct dom_string *string, int state); + struct dom_string *string, int state, int count_lines); /* The number of tokens in the scanners token table: * At best it should be big enough to contain properties with space separated @@ -123,6 +123,9 @@ struct dom_scanner { int line; #endif + unsigned int count_lines:1; + unsigned int lineno; + /* Some state indicator only meaningful to the scanner internals */ int state; diff --git a/src/dom/select.c b/src/dom/select.c index b3fe4cb20..33f9e4c91 100644 --- a/src/dom/select.c +++ b/src/dom/select.c @@ -391,7 +391,7 @@ parse_dom_select(struct dom_select *select, struct dom_stack *stack, struct dom_scanner scanner; struct dom_select_node sel; - init_dom_scanner(&scanner, &dom_css_scanner_info, string, 0); + init_dom_scanner(&scanner, &dom_css_scanner_info, string, 0, 0); memset(&sel, 0, sizeof(sel)); diff --git a/src/dom/sgml/parser.c b/src/dom/sgml/parser.c index 72ffdbe08..2602028e2 100644 --- a/src/dom/sgml/parser.c +++ b/src/dom/sgml/parser.c @@ -296,7 +296,8 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner) struct dom_scanner attr_scanner; init_dom_scanner(&attr_scanner, &sgml_scanner_info, - &token->string, SGML_STATE_ELEMENT); + &token->string, SGML_STATE_ELEMENT, + scanner->count_lines); if (dom_scanner_has_tokens(&attr_scanner)) parse_sgml_attributes(stack, &attr_scanner); @@ -361,7 +362,7 @@ sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data) parsing->depth = parser->stack.depth; get_dom_stack_top(&parser->stack)->immutable = 1; init_dom_scanner(&parsing->scanner, &sgml_scanner_info, &node->string, - SGML_STATE_TEXT); + SGML_STATE_TEXT, 0); } static void diff --git a/src/dom/sgml/scanner.c b/src/dom/sgml/scanner.c index 306d6739e..8a51aab86 100644 --- a/src/dom/sgml/scanner.c +++ b/src/dom/sgml/scanner.c @@ -36,7 +36,7 @@ static struct dom_scan_table_info sgml_scan_table_info[] = { DOM_SCAN_TABLE_STRING("-_:.", SGML_CHAR_IDENT | SGML_CHAR_ENTITY), DOM_SCAN_TABLE_STRING("#", SGML_CHAR_ENTITY), DOM_SCAN_TABLE_STRING(" \f\n\r\t\v", SGML_CHAR_WHITESPACE), - DOM_SCAN_TABLE_STRING("\f\n\r", SGML_CHAR_NEWLINE), + DOM_SCAN_TABLE_STRING("\f\n", SGML_CHAR_NEWLINE), DOM_SCAN_TABLE_STRING("<&", SGML_CHAR_NOT_TEXT), DOM_SCAN_TABLE_STRING("<=>", SGML_CHAR_NOT_ATTRIBUTE), @@ -74,6 +74,7 @@ struct dom_scanner_info sgml_scanner_info = { #define is_sgml_ident(c) check_sgml_table(c, SGML_CHAR_IDENT) #define is_sgml_entity(c) check_sgml_table(c, SGML_CHAR_ENTITY) #define is_sgml_space(c) check_sgml_table(c, SGML_CHAR_WHITESPACE) +#define is_sgml_newline(c) check_sgml_table(c, SGML_CHAR_NEWLINE) #define is_sgml_text(c) !check_sgml_table(c, SGML_CHAR_NOT_TEXT) #define is_sgml_token_start(c) check_sgml_table(c, SGML_CHAR_TOKEN_START) #define is_sgml_attribute(c) !check_sgml_table(c, SGML_CHAR_NOT_ATTRIBUTE | SGML_CHAR_WHITESPACE) @@ -83,7 +84,16 @@ skip_sgml_space(struct dom_scanner *scanner, unsigned char **string) { unsigned char *pos = *string; - scan_sgml(scanner, pos, SGML_CHAR_WHITESPACE); + if (!scanner->count_lines) { + scan_sgml(scanner, pos, SGML_CHAR_WHITESPACE); + } else { + while (pos < scanner->end && is_sgml_space(*pos)) { + if (is_sgml_newline(*pos)) + scanner->lineno++; + pos++; + } + } + *string = pos; } @@ -157,11 +167,26 @@ static inline unsigned char * skip_sgml_chars(struct dom_scanner *scanner, unsigned char *string, unsigned char skipto) { + int newlines; + assert(string >= scanner->position && string <= scanner->end); - for (; string < scanner->end; string++) { - if (*string == skipto) + if (!scanner->count_lines) { + size_t length = scanner->end - string; + + return memchr(string, skipto, length); + } + + for (newlines = 0; string < scanner->end; string++) { + if (is_sgml_newline(*string)) + newlines++; + if (*string == skipto) { + /* Only count newlines if we actually find the + * requested char. Else callers are assumed to discard + * the scanning. */ + scanner->lineno += newlines; return string; + } } return NULL; @@ -189,6 +214,9 @@ skip_sgml(struct dom_scanner *scanner, unsigned char **string, unsigned char ski end = skip_sgml_chars(scanner, pos + 1, *pos); if (end) pos = end; + + } else if (scanner->count_lines && is_sgml_newline(*pos)) { + scanner->lineno++; } }