mirror of
https://github.com/rkd77/elinks.git
synced 2025-02-02 15:09:23 -05:00
Elute all DOM-related code and put it in src/dom
This commit is contained in:
parent
217f905d88
commit
71533eef9a
@ -7,6 +7,7 @@ SUBDIRS-$(CONFIG_FORMHIST) += formhist
|
||||
SUBDIRS-$(CONFIG_GLOBHIST) += globhist
|
||||
SUBDIRS-$(CONFIG_ECMASCRIPT) += ecmascript
|
||||
SUBDIRS-$(CONFIG_SCRIPTING) += scripting
|
||||
SUBDIRS-$(CONFIG_DOM) += dom
|
||||
|
||||
SUBDIRS = \
|
||||
bfu \
|
||||
|
@ -2,7 +2,7 @@ top_builddir=../..
|
||||
include $(top_builddir)/Makefile.config
|
||||
|
||||
SUBDIRS-$(CONFIG_CSS) += css
|
||||
SUBDIRS-$(CONFIG_DOM) += dom sgml
|
||||
SUBDIRS-$(CONFIG_DOM) += dom
|
||||
|
||||
SUBDIRS = html plain
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
top_builddir=../../..
|
||||
include $(top_builddir)/Makefile.config
|
||||
|
||||
OBJS = node.o renderer.o select.o stack.o
|
||||
OBJS = renderer.o
|
||||
|
||||
include $(top_srcdir)/Makefile.lib
|
||||
|
@ -20,11 +20,12 @@
|
||||
#include "document/css/stylesheet.h"
|
||||
#include "document/docdata.h"
|
||||
#include "document/document.h"
|
||||
#include "document/dom/node.h"
|
||||
#include "document/dom/renderer.h"
|
||||
#include "document/dom/stack.h"
|
||||
#include "document/renderer.h"
|
||||
#include "document/sgml/parser.h"
|
||||
#include "dom/scanner.h"
|
||||
#include "dom/sgml/parser.h"
|
||||
#include "dom/node.h"
|
||||
#include "dom/stack.h"
|
||||
#include "intl/charsets.h"
|
||||
#include "globhist/globhist.h" /* get_global_history_item() */
|
||||
#include "protocol/uri.h"
|
||||
@ -32,7 +33,6 @@
|
||||
#include "util/box.h"
|
||||
#include "util/error.h"
|
||||
#include "util/memory.h"
|
||||
#include "util/scanner.h"
|
||||
#include "util/snprintf.h"
|
||||
#include "util/string.h"
|
||||
|
||||
@ -506,7 +506,7 @@ render_dom_element_end_source(struct dom_stack *stack, struct dom_node *node, vo
|
||||
struct dom_renderer *renderer = stack->current->data;
|
||||
struct dom_stack_state *state = get_dom_stack_top(stack);
|
||||
struct sgml_parser_state *pstate = get_dom_stack_state_data(stack->contexts[0], state);
|
||||
struct scanner_token *token = &pstate->end_token;
|
||||
struct dom_scanner_token *token = &pstate->end_token;
|
||||
unsigned char *string = token->string;
|
||||
int length = token->length;
|
||||
|
||||
|
7
src/dom/Makefile
Normal file
7
src/dom/Makefile
Normal file
@ -0,0 +1,7 @@
|
||||
top_builddir=../..
|
||||
include $(top_builddir)/Makefile.config
|
||||
|
||||
SUBDIRS = css sgml
|
||||
OBJS = node.o select.o stack.o scanner.o
|
||||
|
||||
include $(top_srcdir)/Makefile.lib
|
6
src/dom/css/Makefile
Normal file
6
src/dom/css/Makefile
Normal file
@ -0,0 +1,6 @@
|
||||
top_builddir=../../..
|
||||
include $(top_builddir)/Makefile.config
|
||||
|
||||
OBJS = scanner.o
|
||||
|
||||
include $(top_srcdir)/Makefile.lib
|
385
src/dom/css/scanner.c
Normal file
385
src/dom/css/scanner.c
Normal file
@ -0,0 +1,385 @@
|
||||
/* CSS token scanner utilities */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "elinks.h"
|
||||
|
||||
#include "dom/css/scanner.h"
|
||||
#include "dom/scanner.h"
|
||||
#include "util/error.h"
|
||||
#include "util/string.h"
|
||||
|
||||
|
||||
/* Bitmap entries for the CSS character groups used in the scanner table */
|
||||
|
||||
enum css_char_group {
|
||||
CSS_CHAR_ALPHA = (1 << 0),
|
||||
CSS_CHAR_DIGIT = (1 << 1),
|
||||
CSS_CHAR_HEX_DIGIT = (1 << 2),
|
||||
CSS_CHAR_IDENT = (1 << 3),
|
||||
CSS_CHAR_IDENT_START = (1 << 4),
|
||||
CSS_CHAR_NEWLINE = (1 << 5),
|
||||
CSS_CHAR_NON_ASCII = (1 << 6),
|
||||
CSS_CHAR_SGML_MARKUP = (1 << 7),
|
||||
CSS_CHAR_TOKEN = (1 << 8),
|
||||
CSS_CHAR_TOKEN_START = (1 << 9),
|
||||
CSS_CHAR_WHITESPACE = (1 << 10),
|
||||
};
|
||||
|
||||
static const struct dom_scan_table_info css_scan_table_info[] = {
|
||||
DOM_SCAN_TABLE_RANGE("0", '9', CSS_CHAR_DIGIT | CSS_CHAR_HEX_DIGIT | CSS_CHAR_IDENT),
|
||||
DOM_SCAN_TABLE_RANGE("A", 'F', CSS_CHAR_HEX_DIGIT),
|
||||
DOM_SCAN_TABLE_RANGE("A", 'Z', CSS_CHAR_ALPHA | CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),
|
||||
DOM_SCAN_TABLE_RANGE("a", 'f', CSS_CHAR_HEX_DIGIT),
|
||||
DOM_SCAN_TABLE_RANGE("a", 'z', CSS_CHAR_ALPHA | CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),
|
||||
/* For the octal number impared (me including) \241 is 161 --jonas */
|
||||
DOM_SCAN_TABLE_RANGE("\241", 255, CSS_CHAR_NON_ASCII | CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),
|
||||
|
||||
DOM_SCAN_TABLE_STRING(" \f\n\r\t\v\000", CSS_CHAR_WHITESPACE),
|
||||
DOM_SCAN_TABLE_STRING("\f\n\r", CSS_CHAR_NEWLINE),
|
||||
DOM_SCAN_TABLE_STRING("-", CSS_CHAR_IDENT),
|
||||
DOM_SCAN_TABLE_STRING(".#@!\"'<-/|^$*", CSS_CHAR_TOKEN_START),
|
||||
/* Unicode escape (that we do not handle yet) + other special chars */
|
||||
DOM_SCAN_TABLE_STRING("\\_", CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),
|
||||
/* This should contain mostly used char tokens like ':' and maybe a few
|
||||
* garbage chars that people might put in their CSS code */
|
||||
DOM_SCAN_TABLE_STRING("[({})];:,.>+~", CSS_CHAR_TOKEN),
|
||||
DOM_SCAN_TABLE_STRING("<![CDATA]->", CSS_CHAR_SGML_MARKUP),
|
||||
|
||||
DOM_SCAN_TABLE_END,
|
||||
};
|
||||
|
||||
static const struct dom_scanner_string_mapping css_string_mappings[] = {
|
||||
{ "Hz", CSS_TOKEN_FREQUENCY, CSS_TOKEN_DIMENSION },
|
||||
{ "cm", CSS_TOKEN_LENGTH, CSS_TOKEN_DIMENSION },
|
||||
{ "deg", CSS_TOKEN_ANGLE, CSS_TOKEN_DIMENSION },
|
||||
{ "em", CSS_TOKEN_EM, CSS_TOKEN_DIMENSION },
|
||||
{ "ex", CSS_TOKEN_EX, CSS_TOKEN_DIMENSION },
|
||||
{ "grad", CSS_TOKEN_ANGLE, CSS_TOKEN_DIMENSION },
|
||||
{ "in", CSS_TOKEN_LENGTH, CSS_TOKEN_DIMENSION },
|
||||
{ "kHz", CSS_TOKEN_FREQUENCY, CSS_TOKEN_DIMENSION },
|
||||
{ "mm", CSS_TOKEN_LENGTH, CSS_TOKEN_DIMENSION },
|
||||
{ "ms", CSS_TOKEN_TIME, CSS_TOKEN_DIMENSION },
|
||||
{ "pc", CSS_TOKEN_LENGTH, CSS_TOKEN_DIMENSION },
|
||||
{ "pt", CSS_TOKEN_LENGTH, CSS_TOKEN_DIMENSION },
|
||||
{ "px", CSS_TOKEN_LENGTH, CSS_TOKEN_DIMENSION },
|
||||
{ "rad", CSS_TOKEN_ANGLE, CSS_TOKEN_DIMENSION },
|
||||
{ "s", CSS_TOKEN_TIME, CSS_TOKEN_DIMENSION },
|
||||
|
||||
{ "rgb", CSS_TOKEN_RGB, CSS_TOKEN_FUNCTION },
|
||||
{ "url", CSS_TOKEN_URL, CSS_TOKEN_FUNCTION },
|
||||
|
||||
{ "charset", CSS_TOKEN_AT_CHARSET, CSS_TOKEN_AT_KEYWORD },
|
||||
{ "font-face", CSS_TOKEN_AT_FONT_FACE, CSS_TOKEN_AT_KEYWORD },
|
||||
{ "import", CSS_TOKEN_AT_IMPORT, CSS_TOKEN_AT_KEYWORD },
|
||||
{ "media", CSS_TOKEN_AT_MEDIA, CSS_TOKEN_AT_KEYWORD },
|
||||
{ "page", CSS_TOKEN_AT_PAGE, CSS_TOKEN_AT_KEYWORD },
|
||||
|
||||
{ NULL, CSS_TOKEN_NONE, CSS_TOKEN_NONE },
|
||||
};
|
||||
|
||||
static struct dom_scanner_token *scan_css_tokens(struct dom_scanner *scanner);
|
||||
|
||||
struct dom_scanner_info dom_css_scanner_info = {
|
||||
css_string_mappings,
|
||||
css_scan_table_info,
|
||||
scan_css_tokens,
|
||||
};
|
||||
|
||||
#define check_css_table(c, bit) (dom_css_scanner_info.scan_table[(c)] & (bit))
|
||||
|
||||
#define scan_css(scanner, s, bit) \
|
||||
while ((s) < (scanner)->end && check_css_table(*(s), bit)) (s)++;
|
||||
|
||||
#define scan_back_css(scanner, s, bit) \
|
||||
while ((s) >= (scanner)->string && check_css_table(*(s), bit)) (s)--;
|
||||
|
||||
#define is_css_ident_start(c) check_css_table(c, CSS_CHAR_IDENT_START)
|
||||
#define is_css_ident(c) check_css_table(c, CSS_CHAR_IDENT)
|
||||
#define is_css_digit(c) check_css_table(c, CSS_CHAR_DIGIT)
|
||||
#define is_css_hexdigit(c) check_css_table(c, CSS_CHAR_HEX_DIGIT)
|
||||
#define is_css_char_token(c) check_css_table(c, CSS_CHAR_TOKEN)
|
||||
#define is_css_token_start(c) check_css_table(c, CSS_CHAR_TOKEN_START)
|
||||
|
||||
|
||||
#define skip_css(scanner, s, skipto) \
|
||||
while (s < (scanner)->end \
|
||||
&& *(s) != (skipto) \
|
||||
&& check_css_precedence(*(s), skipto)) { \
|
||||
if (isquote(*(s))) { \
|
||||
int size = (scanner)->end - (s); \
|
||||
unsigned char *end = memchr(s + 1, *(s), size); \
|
||||
\
|
||||
if (end) (s) = end; \
|
||||
} \
|
||||
(s)++; \
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
scan_css_token(struct dom_scanner *scanner, struct dom_scanner_token *token)
|
||||
{
|
||||
unsigned char *string = scanner->position;
|
||||
unsigned char first_char = *string;
|
||||
enum css_token_type type = CSS_TOKEN_GARBAGE;
|
||||
int real_length = -1;
|
||||
|
||||
assert(first_char);
|
||||
token->string = string++;
|
||||
|
||||
if (is_css_char_token(first_char)) {
|
||||
type = first_char;
|
||||
|
||||
} else if (is_css_digit(first_char) || first_char == '.') {
|
||||
scan_css(scanner, string, CSS_CHAR_DIGIT);
|
||||
|
||||
/* First scan the full number token */
|
||||
if (*string == '.') {
|
||||
string++;
|
||||
|
||||
if (is_css_digit(*string)) {
|
||||
type = CSS_TOKEN_NUMBER;
|
||||
scan_css(scanner, string, CSS_CHAR_DIGIT);
|
||||
}
|
||||
}
|
||||
|
||||
/* Check what kind of number we have */
|
||||
if (*string == '%') {
|
||||
if (first_char != '.')
|
||||
type = CSS_TOKEN_PERCENTAGE;
|
||||
string++;
|
||||
|
||||
} else if (!is_css_ident_start(*string)) {
|
||||
type = CSS_TOKEN_NUMBER;
|
||||
|
||||
} else {
|
||||
unsigned char *ident = string;
|
||||
|
||||
scan_css(scanner, string, CSS_CHAR_IDENT);
|
||||
type = map_dom_scanner_string(scanner, ident, string,
|
||||
CSS_TOKEN_DIMENSION);
|
||||
}
|
||||
|
||||
} else if (is_css_ident_start(first_char)) {
|
||||
scan_css(scanner, string, CSS_CHAR_IDENT);
|
||||
|
||||
if (*string == '(') {
|
||||
unsigned char *function_end = string + 1;
|
||||
|
||||
/* Make sure that we have an ending ')' */
|
||||
skip_css(scanner, function_end, ')');
|
||||
if (*function_end == ')') {
|
||||
type = map_dom_scanner_string(scanner, token->string,
|
||||
string, CSS_TOKEN_FUNCTION);
|
||||
|
||||
/* If it is not a known function just skip the
|
||||
* how arg stuff so we don't end up generating
|
||||
* a lot of useless tokens. */
|
||||
if (type == CSS_TOKEN_FUNCTION) {
|
||||
string = function_end;
|
||||
|
||||
} else if (type == CSS_TOKEN_URL) {
|
||||
/* Extracting the URL first removes any
|
||||
* leading or ending whitespace and
|
||||
* then see if the url is given in a
|
||||
* string. If that is the case the
|
||||
* string delimiters are also trimmed.
|
||||
* This is not totally correct because
|
||||
* we should of course handle escape
|
||||
* sequences .. but that will have to
|
||||
* be fixed later. */
|
||||
unsigned char *from = string + 1;
|
||||
unsigned char *to = function_end - 1;
|
||||
|
||||
scan_css(scanner, from, CSS_CHAR_WHITESPACE);
|
||||
scan_back_css(scanner, to, CSS_CHAR_WHITESPACE);
|
||||
|
||||
if (isquote(*from)) from++;
|
||||
if (isquote(*to)) to--;
|
||||
|
||||
token->string = from;
|
||||
real_length = to - from + 1;
|
||||
assert(real_length >= 0);
|
||||
string = function_end;
|
||||
}
|
||||
|
||||
assert(type != CSS_TOKEN_RGB || *string == '(');
|
||||
assert(type != CSS_TOKEN_URL || *string == ')');
|
||||
assert(type != CSS_TOKEN_FUNCTION || *string == ')');
|
||||
}
|
||||
|
||||
string++;
|
||||
|
||||
} else {
|
||||
type = CSS_TOKEN_IDENT;
|
||||
}
|
||||
|
||||
} else if (!is_css_token_start(first_char)) {
|
||||
/* TODO: Better composing of error tokens. For now we just
|
||||
* split them down into char tokens */
|
||||
|
||||
} else if (first_char == '#') {
|
||||
/* Check whether it is hexcolor or hash token */
|
||||
if (is_css_hexdigit(*string)) {
|
||||
int hexdigits;
|
||||
|
||||
scan_css(scanner, string, CSS_CHAR_HEX_DIGIT);
|
||||
|
||||
/* Check that the hexdigit sequence is either 3 or 6
|
||||
* chars and it isn't just start of some non-hex ident
|
||||
* string. */
|
||||
hexdigits = string - token->string - 1;
|
||||
if ((hexdigits == 3 || hexdigits == 6)
|
||||
&& !is_css_ident(*string)) {
|
||||
type = CSS_TOKEN_HEX_COLOR;
|
||||
} else {
|
||||
scan_css(scanner, string, CSS_CHAR_IDENT);
|
||||
type = CSS_TOKEN_HASH;
|
||||
}
|
||||
|
||||
} else if (is_css_ident(*string)) {
|
||||
/* Not *_ident_start() because hashes are #<name>. */
|
||||
scan_css(scanner, string, CSS_CHAR_IDENT);
|
||||
type = CSS_TOKEN_HASH;
|
||||
}
|
||||
|
||||
} else if (first_char == '@') {
|
||||
/* Compose token containing @<ident> */
|
||||
if (is_css_ident_start(*string)) {
|
||||
unsigned char *ident = string;
|
||||
|
||||
/* Scan both ident start and ident */
|
||||
scan_css(scanner, string, CSS_CHAR_IDENT);
|
||||
type = map_dom_scanner_string(scanner, ident, string,
|
||||
CSS_TOKEN_AT_KEYWORD);
|
||||
}
|
||||
|
||||
} else if (first_char == '*') {
|
||||
if (*string == '=') {
|
||||
type = CSS_TOKEN_SELECT_CONTAINS;
|
||||
string++;
|
||||
} else {
|
||||
type = CSS_TOKEN_IDENT;
|
||||
}
|
||||
|
||||
} else if (first_char == '^') {
|
||||
if (*string == '=') {
|
||||
type = CSS_TOKEN_SELECT_BEGIN;
|
||||
string++;
|
||||
}
|
||||
|
||||
} else if (first_char == '$') {
|
||||
if (*string == '=') {
|
||||
type = CSS_TOKEN_SELECT_END;
|
||||
string++;
|
||||
}
|
||||
|
||||
} else if (first_char == '|') {
|
||||
if (*string == '=') {
|
||||
type = CSS_TOKEN_SELECT_HYPHEN_LIST;
|
||||
string++;
|
||||
}
|
||||
|
||||
} else if (first_char == '!') {
|
||||
scan_css(scanner, string, CSS_CHAR_WHITESPACE);
|
||||
if (!strncasecmp(string, "important", 9)) {
|
||||
type = CSS_TOKEN_IMPORTANT;
|
||||
string += 9;
|
||||
}
|
||||
|
||||
} else if (isquote(first_char)) {
|
||||
/* TODO: Escaped delimiters --jonas */
|
||||
int size = scanner->end - string;
|
||||
unsigned char *string_end = memchr(string, first_char, size);
|
||||
|
||||
if (string_end) {
|
||||
/* We don't want the delimiters in the token */
|
||||
token->string++;
|
||||
real_length = string_end - token->string;
|
||||
string = string_end + 1;
|
||||
type = CSS_TOKEN_STRING;
|
||||
}
|
||||
|
||||
} else if (first_char == '<' || first_char == '-') {
|
||||
/* Try to navigate SGML tagsoup */
|
||||
|
||||
if (*string == '/') {
|
||||
/* Some kind of SGML tag end ... better bail out screaming */
|
||||
type = CSS_TOKEN_NONE;
|
||||
|
||||
} else {
|
||||
unsigned char *sgml = string;
|
||||
|
||||
/* Skip anything looking like SGML "<!--" and "-->"
|
||||
* comments + <![CDATA[ and ]]> notations. */
|
||||
scan_css(scanner, sgml, CSS_CHAR_SGML_MARKUP);
|
||||
|
||||
if (sgml - string >= 2
|
||||
&& ((first_char == '<' && *string == '!')
|
||||
|| (first_char == '-' && sgml[-1] == '>'))) {
|
||||
type = CSS_TOKEN_SKIP;
|
||||
string = sgml;
|
||||
}
|
||||
}
|
||||
|
||||
} else if (first_char == '/') {
|
||||
/* Comments */
|
||||
if (*string == '*') {
|
||||
type = CSS_TOKEN_SKIP;
|
||||
|
||||
for (string++; string < scanner->end; string++)
|
||||
if (*string == '*' && string[1] == '/') {
|
||||
string += 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
INTERNAL("Someone forgot to put code for recognizing tokens "
|
||||
"which start with '%c'.", first_char);
|
||||
}
|
||||
|
||||
token->type = type;
|
||||
token->length = real_length > 0 ? real_length : string - token->string;
|
||||
token->precedence = get_css_precedence(type);
|
||||
scanner->position = string;
|
||||
}
|
||||
|
||||
static struct dom_scanner_token *
|
||||
scan_css_tokens(struct dom_scanner *scanner)
|
||||
{
|
||||
struct dom_scanner_token *table_end = scanner->table + DOM_SCANNER_TOKENS;
|
||||
struct dom_scanner_token *current;
|
||||
|
||||
if (!begin_dom_token_scanning(scanner))
|
||||
return get_dom_scanner_token(scanner);
|
||||
|
||||
/* Scan tokens until we fill the table */
|
||||
for (current = scanner->table + scanner->tokens;
|
||||
current < table_end && scanner->position < scanner->end;
|
||||
current++) {
|
||||
scan_css(scanner, scanner->position, CSS_CHAR_WHITESPACE);
|
||||
if (scanner->position >= scanner->end) break;
|
||||
|
||||
scan_css_token(scanner, current);
|
||||
|
||||
/* Did some one scream for us to end the madness? */
|
||||
if (current->type == CSS_TOKEN_NONE) {
|
||||
scanner->position = NULL;
|
||||
current--;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Shall we scratch this token? */
|
||||
if (current->type == CSS_TOKEN_SKIP) {
|
||||
current--;
|
||||
}
|
||||
}
|
||||
|
||||
return end_dom_token_scanning(scanner, current);
|
||||
}
|
112
src/dom/css/scanner.h
Normal file
112
src/dom/css/scanner.h
Normal file
@ -0,0 +1,112 @@
|
||||
|
||||
#ifndef EL__DOM_CSS_SCANNER_H
|
||||
#define EL__DOM_CSS_SCANNER_H
|
||||
|
||||
#include "dom/scanner.h"
|
||||
|
||||
/* The various token types and what they contain. Patterns taken from
|
||||
* the flex scanner declarations in the CSS 2 Specification. */
|
||||
enum css_token_type {
|
||||
/* Char tokens: */
|
||||
|
||||
/* Char tokens range from 1 to 255 and have their char value as type */
|
||||
/* meaning non char tokens have values from 256 and up. */
|
||||
|
||||
/* Low level string tokens: */
|
||||
|
||||
/* {...} means char group, <...> means token */
|
||||
/* {identstart} [a-z_]|{nonascii} */
|
||||
/* {ident} [a-z0-9_-]|{nonascii} */
|
||||
/* <ident> {identstart}{ident}* */
|
||||
/* <name> {ident}+ */
|
||||
/* <number> [0-9]+|[0-9]*"."[0-9]+ */
|
||||
|
||||
/* Percentage is put because although it looks like being composed of
|
||||
* <number> and '%' floating point numbers are really not allowed but
|
||||
* strtol() will round it down for us ;) */
|
||||
CSS_TOKEN_IDENT = 256, /* <ident> */
|
||||
CSS_TOKEN_NUMBER, /* <number> */
|
||||
CSS_TOKEN_PERCENTAGE, /* <number>% */
|
||||
CSS_TOKEN_STRING, /* Char sequence delimted by matching ' or " */
|
||||
|
||||
/* High level string tokens: */
|
||||
|
||||
/* The various number values; dimension being the most generic */
|
||||
CSS_TOKEN_ANGLE, /* <number>rad, <number>grad or <number>deg */
|
||||
CSS_TOKEN_DIMENSION, /* <number><ident> */
|
||||
CSS_TOKEN_EM, /* <number>em */
|
||||
CSS_TOKEN_EX, /* <number>ex */
|
||||
CSS_TOKEN_FREQUENCY, /* <number>Hz or <number>kHz */
|
||||
CSS_TOKEN_LENGTH, /* <number>{px,cm,mm,in,pt,pc} */
|
||||
CSS_TOKEN_TIME, /* <number>ms or <number>s */
|
||||
|
||||
/* XXX: CSS_TOKEN_HASH conflicts with CSS_TOKEN_HEX_COLOR. Generating
|
||||
* hex color tokens has precedence and the hash token user have to
|
||||
* treat CSS_TOKEN_HASH and CSS_TOKEN_HEX_COLOR alike. */
|
||||
CSS_TOKEN_HASH, /* #<name> */
|
||||
CSS_TOKEN_HEX_COLOR, /* #[0-9a-f]\{3,6} */
|
||||
|
||||
/* For all unknown functions we generate on token contain both function name
|
||||
* and args so scanning/parsing is easier. Besides we already check for
|
||||
* ending ')'. */
|
||||
/* For known functions where we need several args [like rgb()] we want
|
||||
* to generate tokens for every arg and arg delimiter ( ',' or ')' ).
|
||||
* Because url() is a bit triggy: it can contain both <string> and some
|
||||
* chars that would other wise make the scanner probably choke we also
|
||||
* include the arg in that token. Besides it will make things like
|
||||
* 'background' property parsing easier. */
|
||||
CSS_TOKEN_FUNCTION, /* <ident>(<args>) */
|
||||
CSS_TOKEN_RGB, /* rgb( */
|
||||
CSS_TOKEN_URL, /* url(<arg>) */
|
||||
|
||||
/* @-rule symbols */
|
||||
CSS_TOKEN_AT_KEYWORD, /* @<ident> */
|
||||
CSS_TOKEN_AT_CHARSET, /* @charset */
|
||||
CSS_TOKEN_AT_FONT_FACE, /* @font-face */
|
||||
CSS_TOKEN_AT_IMPORT, /* @import */
|
||||
CSS_TOKEN_AT_MEDIA, /* @media */
|
||||
CSS_TOKEN_AT_PAGE, /* @page */
|
||||
|
||||
CSS_TOKEN_IMPORTANT, /* !<whitespace>important */
|
||||
|
||||
/* TODO: Selector stuff: */
|
||||
CSS_TOKEN_SELECT_SPACE_LIST, /* ~= */
|
||||
CSS_TOKEN_SELECT_HYPHEN_LIST, /* |= */
|
||||
CSS_TOKEN_SELECT_BEGIN, /* ^= */
|
||||
CSS_TOKEN_SELECT_END, /* $= */
|
||||
CSS_TOKEN_SELECT_CONTAINS, /* *= */
|
||||
|
||||
/* Special tokens: */
|
||||
|
||||
/* A special token for unrecognized strings */
|
||||
CSS_TOKEN_GARBAGE,
|
||||
|
||||
/* Token type used internally when scanning to signal that the token
|
||||
* should not be recorded in the scanners token table. */
|
||||
CSS_TOKEN_SKIP,
|
||||
|
||||
/* Another internal token type used both to mark unused tokens in the
|
||||
* scanner table as invalid or when scanning to signal that the
|
||||
* scanning should end. */
|
||||
CSS_TOKEN_NONE = 0,
|
||||
};
|
||||
|
||||
extern struct dom_scanner_info dom_css_scanner_info;
|
||||
|
||||
#define skip_css_tokens(scanner, type) \
|
||||
skip_dom_scanner_tokens(scanner, type, get_css_precedence(type))
|
||||
|
||||
#define get_css_precedence(token_type) \
|
||||
((token_type) == '}' ? (1 << 10) : \
|
||||
(token_type) == '{' ? (1 << 9) : \
|
||||
(token_type) == ';' ? (1 << 8) : \
|
||||
(token_type) == ')' ? (1 << 7) : 0)
|
||||
|
||||
/* Check whether it is safe to skip the @token when looking for @skipto. */
|
||||
static inline int
|
||||
check_css_precedence(int type, int skipto)
|
||||
{
|
||||
return get_css_precedence(type) < get_css_precedence(skipto);
|
||||
}
|
||||
|
||||
#endif
|
@ -1,5 +1,5 @@
|
||||
#ifndef EL__DOCUMENT_DOM_DOM_H
|
||||
#define EL__DOCUMENT_DOM_DOM_H
|
||||
#ifndef EL_DOM_DOM_H
|
||||
#define EL_DOM_DOM_H
|
||||
|
||||
enum dom_exception_code {
|
||||
DOM_ERR_NONE = 0,
|
@ -9,8 +9,7 @@
|
||||
|
||||
#include "elinks.h"
|
||||
|
||||
#include "document/dom/node.h"
|
||||
#include "intl/charsets.h"
|
||||
#include "dom/node.h"
|
||||
#include "util/hash.h"
|
||||
#include "util/lists.h"
|
||||
#include "util/memory.h"
|
@ -1,8 +1,8 @@
|
||||
|
||||
#ifndef EL__DOCUMENT_DOM_NODE_H
|
||||
#define EL__DOCUMENT_DOM_NODE_H
|
||||
#ifndef EL_DOM_NODE_H
|
||||
#define EL_DOM_NODE_H
|
||||
|
||||
#include "document/dom/string.h"
|
||||
#include "dom/string.h"
|
||||
#include "util/hash.h"
|
||||
|
||||
struct dom_node_list;
|
172
src/dom/scanner.c
Normal file
172
src/dom/scanner.c
Normal file
@ -0,0 +1,172 @@
|
||||
/* A pretty generic scanner */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "elinks.h"
|
||||
|
||||
#include "dom/scanner.h"
|
||||
#include "util/error.h"
|
||||
#include "util/string.h"
|
||||
|
||||
|
||||
int
|
||||
map_dom_scanner_string(struct dom_scanner *scanner,
|
||||
unsigned char *ident, unsigned char *end, int base_type)
|
||||
{
|
||||
const struct dom_scanner_string_mapping *mappings = scanner->info->mappings;
|
||||
int length = end - ident;
|
||||
|
||||
for (; mappings->name; mappings++) {
|
||||
if (mappings->base_type == base_type
|
||||
&& !strlcasecmp(mappings->name, -1, ident, length))
|
||||
return mappings->type;
|
||||
}
|
||||
|
||||
return base_type;
|
||||
}
|
||||
|
||||
|
||||
struct dom_scanner_token *
|
||||
skip_dom_scanner_tokens(struct dom_scanner *scanner, int skipto, int precedence)
|
||||
{
|
||||
struct dom_scanner_token *token = get_dom_scanner_token(scanner);
|
||||
|
||||
/* Skip tokens while handling some basic precedens of special chars
|
||||
* so we don't skip to long. */
|
||||
while (token) {
|
||||
if (token->type == skipto
|
||||
|| token->precedence > precedence)
|
||||
break;
|
||||
token = get_next_dom_scanner_token(scanner);
|
||||
}
|
||||
|
||||
return (token && token->type == skipto)
|
||||
? get_next_dom_scanner_token(scanner) : NULL;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_SCANNER
|
||||
void
|
||||
dump_dom_scanner(struct dom_scanner *scanner)
|
||||
{
|
||||
unsigned char buffer[MAX_STR_LEN];
|
||||
struct dom_scanner_token *token = scanner->current;
|
||||
struct dom_scanner_token *table_end = scanner->table + scanner->tokens;
|
||||
unsigned char *srcpos = token->string, *bufpos = buffer;
|
||||
int src_lookahead = 50;
|
||||
int token_lookahead = 4;
|
||||
int srclen;
|
||||
|
||||
if (!dom_scanner_has_tokens(scanner)) return;
|
||||
|
||||
memset(buffer, 0, MAX_STR_LEN);
|
||||
for (; token_lookahead > 0 && token < table_end; token++, token_lookahead--) {
|
||||
int buflen = MAX_STR_LEN - (bufpos - buffer);
|
||||
int added = snprintf(bufpos, buflen, "[%.*s] ", token->length, token->string);
|
||||
|
||||
bufpos += added;
|
||||
}
|
||||
|
||||
if (scanner->tokens > token_lookahead) {
|
||||
memcpy(bufpos, "... ", 4);
|
||||
bufpos += 4;
|
||||
}
|
||||
|
||||
srclen = strlen(srcpos);
|
||||
int_upper_bound(&src_lookahead, srclen);
|
||||
*bufpos++ = '[';
|
||||
|
||||
/* Compress the lookahead string */
|
||||
for (; src_lookahead > 0; src_lookahead--, srcpos++, bufpos++) {
|
||||
if (*srcpos == '\n' || *srcpos == '\r' || *srcpos == '\t') {
|
||||
*bufpos++ = '\\';
|
||||
*bufpos = *srcpos == '\n' ? 'n'
|
||||
: (*srcpos == '\r' ? 'r' : 't');
|
||||
} else {
|
||||
*bufpos = *srcpos;
|
||||
}
|
||||
}
|
||||
|
||||
if (srclen > src_lookahead)
|
||||
memcpy(bufpos, "...]", 4);
|
||||
else
|
||||
memcpy(bufpos, "]", 2);
|
||||
|
||||
errfile = scanner->file, errline = scanner->line;
|
||||
elinks_wdebug("%s", buffer);
|
||||
}
|
||||
|
||||
struct dom_scanner_token *
|
||||
get_dom_scanner_token_debug(struct dom_scanner *scanner)
|
||||
{
|
||||
if (!dom_scanner_has_tokens(scanner)) return NULL;
|
||||
|
||||
dump_dom_scanner(scanner);
|
||||
|
||||
/* Make sure we do not return invalid tokens */
|
||||
assert(!dom_scanner_has_tokens(scanner)
|
||||
|| scanner->current->type != 0);
|
||||
|
||||
return get_dom_scanner_token(scanner);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* Initializers */
|
||||
|
||||
static inline void
|
||||
init_dom_scanner_info(struct dom_scanner_info *scanner_info)
|
||||
{
|
||||
const struct dom_scan_table_info *info = scanner_info->scan_table_info;
|
||||
int *scan_table = scanner_info->scan_table;
|
||||
int i;
|
||||
|
||||
if (!info) return;
|
||||
|
||||
for (i = 0; info[i].type != DOM_SCAN_END; i++) {
|
||||
const union scan_table_data *data = &info[i].data;
|
||||
|
||||
if (info[i].type == DOM_SCAN_RANGE) {
|
||||
int index = *data->range.start;
|
||||
|
||||
assert(index > 0);
|
||||
assert(data->range.end < DOM_SCAN_TABLE_SIZE);
|
||||
assert(index <= data->range.end);
|
||||
|
||||
for (; index <= data->range.end; index++)
|
||||
scan_table[index] |= info[i].bits;
|
||||
|
||||
} else {
|
||||
unsigned char *string = info[i].data.string.source;
|
||||
int pos = info[i].data.string.length - 1;
|
||||
|
||||
assert(info[i].type == DOM_SCAN_STRING && pos >= 0);
|
||||
|
||||
for (; pos >= 0; pos--)
|
||||
scan_table[string[pos]] |= info[i].bits;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_info,
|
||||
unsigned char *string, unsigned char *end)
|
||||
{
|
||||
if (!scanner_info->initialized) {
|
||||
init_dom_scanner_info(scanner_info);
|
||||
scanner_info->initialized = 1;
|
||||
}
|
||||
|
||||
memset(scanner, 0, sizeof(*scanner));
|
||||
|
||||
scanner->string = string;
|
||||
scanner->position = string;
|
||||
scanner->end = end ? end : string + strlen(string);
|
||||
scanner->current = scanner->table;
|
||||
scanner->info = scanner_info;
|
||||
scanner->info->scan(scanner);
|
||||
}
|
249
src/dom/scanner.h
Normal file
249
src/dom/scanner.h
Normal file
@ -0,0 +1,249 @@
|
||||
#ifndef EL_DOM_SCANNER_H
|
||||
#define EL_DOM_SCANNER_H
|
||||
|
||||
#include "util/error.h"
|
||||
|
||||
/* Define if you want a talking scanner */
|
||||
/* #define DEBUG_DOM_SCANNER */
|
||||
|
||||
/* The {struct dom_scanner_token} describes one scanner state. There are two
|
||||
* kinds of tokens: char and non-char tokens. Char tokens contains only one
|
||||
* char and simply have their char value as type. They are tokens having
|
||||
* special control meaning in the code, like ':', ';', '{', '}' and '*'. Non
|
||||
* char tokens has one or more chars and contain stuff like number or
|
||||
* indentifier strings. */
|
||||
struct dom_scanner_token {
|
||||
/* The type the token */
|
||||
int type;
|
||||
|
||||
/* Some precedence value */
|
||||
int precedence;
|
||||
|
||||
/* The start of the token string and the token length */
|
||||
unsigned char *string;
|
||||
int length;
|
||||
};
|
||||
|
||||
/* The naming of these two macros is a bit odd .. we compare often with
|
||||
* "static" strings (I don't have a better word) so the macro name should
|
||||
* be short. --jonas */
|
||||
|
||||
/* Compare the string of @token with @string */
|
||||
#define dom_scanner_token_strlcasecmp(token, str, len) \
|
||||
((token) && !strlcasecmp((token)->string, (token)->length, str, len))
|
||||
|
||||
/* Also compares the token string but using a "static" string */
|
||||
#define dom_scanner_token_contains(token, str) \
|
||||
dom_scanner_token_strlcasecmp(token, str, sizeof(str) - 1)
|
||||
|
||||
|
||||
struct dom_scan_table_info {
|
||||
enum { DOM_SCAN_RANGE, DOM_SCAN_STRING, DOM_SCAN_END } type;
|
||||
union scan_table_data {
|
||||
struct { unsigned char *source; long length; } string;
|
||||
struct { unsigned char *start; long end; } range;
|
||||
} data;
|
||||
int bits;
|
||||
};
|
||||
|
||||
#define DOM_SCAN_TABLE_SIZE 256
|
||||
|
||||
#define DOM_SCAN_TABLE_INFO(type, data1, data2, bits) \
|
||||
{ (type), { { (data1), (data2) } }, (bits) }
|
||||
|
||||
#define DOM_SCAN_TABLE_RANGE(from, to, bits) \
|
||||
DOM_SCAN_TABLE_INFO(DOM_SCAN_RANGE, from, to, bits)
|
||||
|
||||
#define DOM_SCAN_TABLE_STRING(str, bits) \
|
||||
DOM_SCAN_TABLE_INFO(DOM_SCAN_STRING, str, sizeof(str) - 1, bits)
|
||||
|
||||
#define DOM_SCAN_TABLE_END \
|
||||
DOM_SCAN_TABLE_INFO(DOM_SCAN_END, 0, 0, 0)
|
||||
|
||||
struct dom_scanner_string_mapping {
|
||||
unsigned char *name;
|
||||
int type;
|
||||
int base_type;
|
||||
};
|
||||
|
||||
struct dom_scanner;
|
||||
|
||||
struct dom_scanner_info {
|
||||
/* Table containing how to map strings to token types */
|
||||
const struct dom_scanner_string_mapping *mappings;
|
||||
|
||||
/* Information for how to initialize the scanner table */
|
||||
const struct dom_scan_table_info *scan_table_info;
|
||||
|
||||
/* Fills the scanner with tokens. Already scanned tokens which have not
|
||||
* been requested remain and are moved to the start of the scanners
|
||||
* token table. */
|
||||
/* Returns the current token or NULL if there are none. */
|
||||
struct dom_scanner_token *(*scan)(struct dom_scanner *scanner);
|
||||
|
||||
/* The scanner table */
|
||||
/* Contains bitmaps for the various characters groups.
|
||||
* Idea sync'ed from mozilla browser. */
|
||||
int scan_table[DOM_SCAN_TABLE_SIZE];
|
||||
|
||||
/* Has the scanner info been initialized? */
|
||||
unsigned int initialized:1;
|
||||
};
|
||||
|
||||
|
||||
/* Initializes the scanner. */
|
||||
void init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_info,
|
||||
unsigned char *string, unsigned char *end);
|
||||
|
||||
/* The number of tokens in the scanners token table:
|
||||
* At best it should be big enough to contain properties with space separated
|
||||
* values and function calls with up to 3 variables like rgb(). At worst it
|
||||
* should be no less than 2 in order to be able to peek at the next token in
|
||||
* the scanner. */
|
||||
#define DOM_SCANNER_TOKENS 10
|
||||
|
||||
/* The {struct dom_scanner} describes the current state of the scanner. */
|
||||
struct dom_scanner {
|
||||
/* The very start of the scanned string, the position in the string
|
||||
* where to scan next and the end of the string. If position is NULL it
|
||||
* means that no more tokens can be retrieved from the string. */
|
||||
unsigned char *string, *position, *end;
|
||||
|
||||
/* The current token and number of scanned tokens in the table.
|
||||
* If the number of scanned tokens is less than DOM_SCANNER_TOKENS it
|
||||
* is because there are no more tokens in the string. */
|
||||
struct dom_scanner_token *current;
|
||||
int tokens;
|
||||
|
||||
/* The 'meta' scanner information */
|
||||
struct dom_scanner_info *info;
|
||||
|
||||
#ifdef DEBUG_SCANNER
|
||||
/* Debug info about the caller. */
|
||||
unsigned char *file;
|
||||
int line;
|
||||
#endif
|
||||
|
||||
/* Some state indicator only meaningful to the scanner internals */
|
||||
int state;
|
||||
|
||||
/* The table contain already scanned tokens. It is maintained in
|
||||
* order to optimize the scanning a bit and make it possible to look
|
||||
* ahead at the next token. You should always use the accessors
|
||||
* (defined below) for getting tokens from the scanner. */
|
||||
struct dom_scanner_token table[DOM_SCANNER_TOKENS];
|
||||
};
|
||||
|
||||
#define dom_scanner_has_tokens(scanner) \
|
||||
((scanner)->tokens > 0 && (scanner)->current < (scanner)->table + (scanner)->tokens)
|
||||
|
||||
/* This macro checks if the current scanner state is valid. Meaning if the
|
||||
* scanners table is full the last token skipping or get_next_scanner_token()
|
||||
* call made it possible to get the type of the next token. */
|
||||
#define check_dom_scanner(scanner) \
|
||||
(scanner->tokens < DOM_SCANNER_TOKENS \
|
||||
|| scanner->current + 1 < scanner->table + scanner->tokens)
|
||||
|
||||
|
||||
/* Scanner table accessors and mutators */
|
||||
|
||||
/* Checks the type of the next token */
|
||||
#define check_next_dom_scanner_token(scanner, token_type) \
|
||||
(scanner_has_tokens(scanner) \
|
||||
&& ((scanner)->current + 1 < (scanner)->table + (scanner)->tokens) \
|
||||
&& (scanner)->current[1].type == (token_type))
|
||||
|
||||
/* Access current and next token. Getting the next token might cause
|
||||
* a rescan so any token pointers that has been stored in a local variable
|
||||
* might not be valid after the call. */
|
||||
static inline struct dom_scanner_token *
|
||||
get_dom_scanner_token(struct dom_scanner *scanner)
|
||||
{
|
||||
return dom_scanner_has_tokens(scanner) ? scanner->current : NULL;
|
||||
}
|
||||
|
||||
/* Do a scanning if we do not have also have access to next token. */
|
||||
static inline struct dom_scanner_token *
|
||||
get_next_dom_scanner_token(struct dom_scanner *scanner)
|
||||
{
|
||||
return (dom_scanner_has_tokens(scanner)
|
||||
&& (++scanner->current + 1 >= scanner->table + scanner->tokens)
|
||||
? scanner->info->scan(scanner) : get_dom_scanner_token(scanner));
|
||||
}
|
||||
|
||||
/* This should just make the code more understandable .. hopefully */
|
||||
#define skip_dom_scanner_token(scanner) get_next_dom_scanner_token(scanner)
|
||||
|
||||
/* Removes tokens from the scanner until it meets a token of the given type.
|
||||
* This token will then also be skipped. */
|
||||
struct dom_scanner_token *
|
||||
skip_dom_scanner_tokens(struct dom_scanner *scanner, int skipto, int precedence);
|
||||
|
||||
/* Looks up the string from @ident to @end to in the scanners string mapping
|
||||
* table */
|
||||
int
|
||||
map_dom_scanner_string(struct dom_scanner *scanner,
|
||||
unsigned char *ident, unsigned char *end, int base_type);
|
||||
|
||||
#ifdef DEBUG_DOM_SCANNER
|
||||
void dump_dom_scanner(struct dom_scanner *scanner);
|
||||
#endif
|
||||
|
||||
/* The begin_token_scanning() and end_token_scanning() functions provide the
|
||||
* basic setup and teardown for the rescan function made public via the
|
||||
* scanner_info->scan member. */
|
||||
|
||||
/* Returns NULL if it is not necessary to try to scan for more tokens */
|
||||
static inline struct dom_scanner_token *
|
||||
begin_dom_token_scanning(struct dom_scanner *scanner)
|
||||
{
|
||||
struct dom_scanner_token *table = scanner->table;
|
||||
struct dom_scanner_token *table_end = table + scanner->tokens;
|
||||
int move_to_front = int_max(table_end - scanner->current, 0);
|
||||
struct dom_scanner_token *current = move_to_front ? scanner->current : table;
|
||||
size_t moved_size = 0;
|
||||
|
||||
assert(scanner->current);
|
||||
|
||||
/* Move any untouched tokens */
|
||||
if (move_to_front) {
|
||||
moved_size = move_to_front * sizeof(*table);
|
||||
memmove(table, current, moved_size);
|
||||
current = &table[move_to_front];
|
||||
}
|
||||
|
||||
/* Clear all unused tokens */
|
||||
memset(current, 0, sizeof(*table) * DOM_SCANNER_TOKENS - moved_size);
|
||||
|
||||
if (!scanner->position) {
|
||||
scanner->tokens = move_to_front ? move_to_front : -1;
|
||||
scanner->current = table;
|
||||
assert(check_dom_scanner(scanner));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
scanner->tokens = move_to_front;
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
/* Updates the @scanner struct after scanning has been done. The position
|
||||
* _after_ the last valid token is taken as the @end argument. */
|
||||
/* It is ok for @end to be < scanner->table since scanner->tokens will become
|
||||
* <= 0 anyway. */
|
||||
static inline struct dom_scanner_token *
|
||||
end_dom_token_scanning(struct dom_scanner *scanner, struct dom_scanner_token *end)
|
||||
{
|
||||
assert(end <= scanner->table + DOM_SCANNER_TOKENS);
|
||||
|
||||
scanner->tokens = (end - scanner->table);
|
||||
scanner->current = scanner->table;
|
||||
if (scanner->position >= scanner->end)
|
||||
scanner->position = NULL;
|
||||
|
||||
assert(check_dom_scanner(scanner));
|
||||
|
||||
return get_dom_scanner_token(scanner);
|
||||
}
|
||||
|
||||
#endif
|
@ -6,13 +6,13 @@
|
||||
|
||||
#include "elinks.h"
|
||||
|
||||
#include "document/css/scanner.h"
|
||||
#include "document/dom/dom.h"
|
||||
#include "document/dom/node.h"
|
||||
#include "document/dom/select.h"
|
||||
#include "document/dom/stack.h"
|
||||
#include "dom/css/scanner.h"
|
||||
#include "dom/dom.h"
|
||||
#include "dom/node.h"
|
||||
#include "dom/scanner.h"
|
||||
#include "dom/select.h"
|
||||
#include "dom/stack.h"
|
||||
#include "util/memory.h"
|
||||
#include "util/scanner.h"
|
||||
#include "util/string.h"
|
||||
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
|
||||
/* Maps the content of a scanner token to a pseudo-class or -element ID. */
|
||||
static enum dom_select_pseudo
|
||||
get_dom_select_pseudo(struct scanner_token *token)
|
||||
get_dom_select_pseudo(struct dom_scanner_token *token)
|
||||
{
|
||||
static struct {
|
||||
struct dom_string string;
|
||||
@ -84,9 +84,9 @@ get_dom_select_pseudo(struct scanner_token *token)
|
||||
|
||||
/* Parses attribute selector. For example '[foo="bar"]' or '[foo|="boo"]'. */
|
||||
static enum dom_exception_code
|
||||
parse_dom_select_attribute(struct dom_select_node *sel, struct scanner *scanner)
|
||||
parse_dom_select_attribute(struct dom_select_node *sel, struct dom_scanner *scanner)
|
||||
{
|
||||
struct scanner_token *token = get_scanner_token(scanner);
|
||||
struct dom_scanner_token *token = get_dom_scanner_token(scanner);
|
||||
|
||||
/* Get '['. */
|
||||
|
||||
@ -95,7 +95,7 @@ parse_dom_select_attribute(struct dom_select_node *sel, struct scanner *scanner)
|
||||
|
||||
/* Get the attribute name. */
|
||||
|
||||
token = get_next_scanner_token(scanner);
|
||||
token = get_next_dom_scanner_token(scanner);
|
||||
if (!token || token->type != CSS_TOKEN_IDENT)
|
||||
return DOM_ERR_SYNTAX;
|
||||
|
||||
@ -103,7 +103,7 @@ parse_dom_select_attribute(struct dom_select_node *sel, struct scanner *scanner)
|
||||
|
||||
/* Get the optional '=' combo or ending ']'. */
|
||||
|
||||
token = get_next_scanner_token(scanner);
|
||||
token = get_next_dom_scanner_token(scanner);
|
||||
if (!token) return DOM_ERR_SYNTAX;
|
||||
|
||||
switch (token->type) {
|
||||
@ -137,7 +137,7 @@ parse_dom_select_attribute(struct dom_select_node *sel, struct scanner *scanner)
|
||||
|
||||
/* Get the required value. */
|
||||
|
||||
token = get_next_scanner_token(scanner);
|
||||
token = get_next_dom_scanner_token(scanner);
|
||||
if (!token) return DOM_ERR_SYNTAX;
|
||||
|
||||
switch (token->type) {
|
||||
@ -152,7 +152,7 @@ parse_dom_select_attribute(struct dom_select_node *sel, struct scanner *scanner)
|
||||
|
||||
/* Get the ending ']'. */
|
||||
|
||||
token = get_next_scanner_token(scanner);
|
||||
token = get_next_dom_scanner_token(scanner);
|
||||
if (token && token->type == ']')
|
||||
return DOM_ERR_NONE;
|
||||
|
||||
@ -170,9 +170,9 @@ parse_dom_select_attribute(struct dom_select_node *sel, struct scanner *scanner)
|
||||
* 0n+0
|
||||
*/
|
||||
|
||||
/* FIXME: Move somewhere else? util/scanner.h? */
|
||||
/* FIXME: Move somewhere else? dom/scanner.h? */
|
||||
static size_t
|
||||
get_scanner_token_number(struct scanner_token *token)
|
||||
get_scanner_token_number(struct dom_scanner_token *token)
|
||||
{
|
||||
size_t number = 0;
|
||||
|
||||
@ -194,26 +194,26 @@ get_scanner_token_number(struct scanner_token *token)
|
||||
|
||||
/* Parses the '(...)' part of ':nth-of-type(...)' and ':nth-child(...)'. */
|
||||
static enum dom_exception_code
|
||||
parse_dom_select_nth_arg(struct dom_select_nth_match *nth, struct scanner *scanner)
|
||||
parse_dom_select_nth_arg(struct dom_select_nth_match *nth, struct dom_scanner *scanner)
|
||||
{
|
||||
struct scanner_token *token = get_next_scanner_token(scanner);
|
||||
struct dom_scanner_token *token = get_next_dom_scanner_token(scanner);
|
||||
int sign = 1;
|
||||
int number = -1;
|
||||
|
||||
if (!token || token->type != '(')
|
||||
return DOM_ERR_SYNTAX;
|
||||
|
||||
token = get_next_scanner_token(scanner);
|
||||
token = get_next_dom_scanner_token(scanner);
|
||||
if (!token)
|
||||
return DOM_ERR_SYNTAX;
|
||||
|
||||
switch (token->type) {
|
||||
case CSS_TOKEN_IDENT:
|
||||
if (scanner_token_contains(token, "even")) {
|
||||
if (dom_scanner_token_contains(token, "even")) {
|
||||
nth->step = 2;
|
||||
nth->index = 0;
|
||||
|
||||
} else if (scanner_token_contains(token, "odd")) {
|
||||
} else if (dom_scanner_token_contains(token, "odd")) {
|
||||
nth->step = 2;
|
||||
nth->index = 1;
|
||||
|
||||
@ -230,7 +230,7 @@ parse_dom_select_nth_arg(struct dom_select_nth_match *nth, struct scanner *scann
|
||||
case '-':
|
||||
sign = -1;
|
||||
|
||||
token = get_next_scanner_token(scanner);
|
||||
token = get_next_dom_scanner_token(scanner);
|
||||
if (!token) return DOM_ERR_SYNTAX;
|
||||
|
||||
if (token->type != CSS_TOKEN_IDENT)
|
||||
@ -245,7 +245,7 @@ parse_dom_select_nth_arg(struct dom_select_nth_match *nth, struct scanner *scann
|
||||
if (number < 0)
|
||||
return DOM_ERR_INVALID_STATE;
|
||||
|
||||
token = get_next_scanner_token(scanner);
|
||||
token = get_next_dom_scanner_token(scanner);
|
||||
if (!token) return DOM_ERR_SYNTAX;
|
||||
break;
|
||||
|
||||
@ -256,18 +256,18 @@ parse_dom_select_nth_arg(struct dom_select_nth_match *nth, struct scanner *scann
|
||||
/* The rest can contain n+ part */
|
||||
switch (token->type) {
|
||||
case CSS_TOKEN_IDENT:
|
||||
if (!scanner_token_contains(token, "n"))
|
||||
if (!dom_scanner_token_contains(token, "n"))
|
||||
return DOM_ERR_SYNTAX;
|
||||
|
||||
nth->step = sign * number;
|
||||
|
||||
token = get_next_scanner_token(scanner);
|
||||
token = get_next_dom_scanner_token(scanner);
|
||||
if (!token) return DOM_ERR_SYNTAX;
|
||||
|
||||
if (token->type != '+')
|
||||
break;
|
||||
|
||||
token = get_next_scanner_token(scanner);
|
||||
token = get_next_dom_scanner_token(scanner);
|
||||
if (!token) return DOM_ERR_SYNTAX;
|
||||
|
||||
if (token->type != CSS_TOKEN_NUMBER)
|
||||
@ -294,15 +294,15 @@ parse_dom_select_nth_arg(struct dom_select_nth_match *nth, struct scanner *scann
|
||||
/* Parse a pseudo-class or -element with the syntax: ':<ident>'. */
|
||||
static enum dom_exception_code
|
||||
parse_dom_select_pseudo(struct dom_select *select, struct dom_select_node *sel,
|
||||
struct scanner *scanner)
|
||||
struct dom_scanner *scanner)
|
||||
{
|
||||
struct scanner_token *token = get_scanner_token(scanner);
|
||||
struct dom_scanner_token *token = get_dom_scanner_token(scanner);
|
||||
enum dom_select_pseudo pseudo;
|
||||
enum dom_exception_code code;
|
||||
|
||||
/* Skip double :'s in front of some pseudo's (::first-line, etc.) */
|
||||
do {
|
||||
token = get_next_scanner_token(scanner);
|
||||
token = get_next_dom_scanner_token(scanner);
|
||||
} while (token && token->type == ':');
|
||||
|
||||
if (!token || token->type != CSS_TOKEN_IDENT)
|
||||
@ -391,15 +391,15 @@ static enum dom_exception_code
|
||||
parse_dom_select(struct dom_select *select, struct dom_stack *stack,
|
||||
unsigned char *string, int length)
|
||||
{
|
||||
struct scanner scanner;
|
||||
struct dom_scanner scanner;
|
||||
struct dom_select_node sel;
|
||||
|
||||
init_scanner(&scanner, &css_scanner_info, string, string + length);
|
||||
init_dom_scanner(&scanner, &dom_css_scanner_info, string, string + length);
|
||||
|
||||
memset(&sel, 0, sizeof(sel));
|
||||
|
||||
while (scanner_has_tokens(&scanner)) {
|
||||
struct scanner_token *token = get_scanner_token(&scanner);
|
||||
while (dom_scanner_has_tokens(&scanner)) {
|
||||
struct dom_scanner_token *token = get_dom_scanner_token(&scanner);
|
||||
enum dom_exception_code code;
|
||||
struct dom_select_node *select_node;
|
||||
|
||||
@ -438,7 +438,7 @@ parse_dom_select(struct dom_select *select, struct dom_stack *stack,
|
||||
break;
|
||||
|
||||
case '.':
|
||||
token = get_next_scanner_token(&scanner);
|
||||
token = get_next_dom_scanner_token(&scanner);
|
||||
if (!token || token->type != CSS_TOKEN_IDENT)
|
||||
return DOM_ERR_SYNTAX;
|
||||
|
||||
@ -476,7 +476,7 @@ parse_dom_select(struct dom_select *select, struct dom_stack *stack,
|
||||
return DOM_ERR_SYNTAX;
|
||||
}
|
||||
|
||||
skip_scanner_token(&scanner);
|
||||
skip_dom_scanner_token(&scanner);
|
||||
|
||||
if (sel.node.type == DOM_NODE_UNKNOWN)
|
||||
continue;
|
@ -1,7 +1,7 @@
|
||||
#ifndef EL__DOCUMENT_DOM_SELECT_H
|
||||
#define EL__DOCUMENT_DOM_SELECT_H
|
||||
#ifndef EL_DOM_SELECT_H
|
||||
#define EL_DOM_SELECT_H
|
||||
|
||||
#include "document/dom/node.h"
|
||||
#include "dom/node.h"
|
||||
|
||||
|
||||
/* FIXME: Namespaces; *|E */
|
@ -9,8 +9,8 @@
|
||||
|
||||
#include "elinks.h"
|
||||
|
||||
#include "document/sgml/html/html.h"
|
||||
#include "document/sgml/sgml.h"
|
||||
#include "dom/sgml/html/html.h"
|
||||
#include "dom/sgml/sgml.h"
|
||||
|
||||
|
||||
#define HTML_(node, name, id) SGML_NODE_INFO(HTML, node, name, id)
|
||||
@ -20,13 +20,13 @@
|
||||
static struct sgml_node_info html_attributes[HTML_ATTRIBUTES] = {
|
||||
SGML_NODE_HEAD(HTML, ATTRIBUTE),
|
||||
|
||||
#include "document/sgml/html/attribute.inc"
|
||||
#include "dom/sgml/html/attribute.inc"
|
||||
};
|
||||
|
||||
static struct sgml_node_info html_elements[HTML_ELEMENTS] = {
|
||||
SGML_NODE_HEAD(HTML, ELEMENT),
|
||||
|
||||
#include "document/sgml/html/element.inc"
|
||||
#include "dom/sgml/html/element.inc"
|
||||
};
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
|
||||
#ifndef EL__DOCUMENT_SGML_HTML_HTML_H
|
||||
#define EL__DOCUMENT_SGML_HTML_HTML_H
|
||||
#ifndef EL_DOM_SGML_HTML_HTML_H
|
||||
#define EL_DOM_SGML_HTML_HTML_H
|
||||
|
||||
#include "document/sgml/sgml.h"
|
||||
#include "dom/sgml/sgml.h"
|
||||
|
||||
extern struct sgml_info sgml_html_info;
|
||||
|
||||
@ -13,7 +13,7 @@ extern struct sgml_info sgml_html_info;
|
||||
enum html_element_type {
|
||||
HTML_ELEMENT_UNKNOWN,
|
||||
|
||||
#include "document/sgml/html/element.inc"
|
||||
#include "dom/sgml/html/element.inc"
|
||||
|
||||
HTML_ELEMENTS,
|
||||
};
|
||||
@ -21,7 +21,7 @@ enum html_element_type {
|
||||
enum html_attribute_type {
|
||||
HTML_ATTRIBUTE_UNKNOWN,
|
||||
|
||||
#include "document/sgml/html/attribute.inc"
|
||||
#include "dom/sgml/html/attribute.inc"
|
||||
|
||||
HTML_ATTRIBUTES,
|
||||
};
|
@ -9,11 +9,11 @@
|
||||
|
||||
#include "elinks.h"
|
||||
|
||||
#include "document/dom/node.h"
|
||||
#include "document/dom/stack.h"
|
||||
#include "document/sgml/parser.h"
|
||||
#include "document/sgml/scanner.h"
|
||||
#include "document/sgml/sgml.h"
|
||||
#include "dom/node.h"
|
||||
#include "dom/stack.h"
|
||||
#include "dom/sgml/parser.h"
|
||||
#include "dom/sgml/scanner.h"
|
||||
#include "dom/sgml/sgml.h"
|
||||
#include "protocol/uri.h"
|
||||
#include "util/error.h"
|
||||
#include "util/lists.h"
|
||||
@ -51,7 +51,7 @@ add_sgml_document(struct dom_stack *stack, struct uri *uri)
|
||||
}
|
||||
|
||||
static inline struct dom_node *
|
||||
add_sgml_element(struct dom_stack *stack, struct scanner_token *token)
|
||||
add_sgml_element(struct dom_stack *stack, struct dom_scanner_token *token)
|
||||
{
|
||||
struct sgml_parser *parser = get_sgml_parser(stack);
|
||||
struct dom_node *parent = get_dom_stack_top(stack)->node;
|
||||
@ -81,7 +81,7 @@ add_sgml_element(struct dom_stack *stack, struct scanner_token *token)
|
||||
|
||||
static inline void
|
||||
add_sgml_attribute(struct dom_stack *stack,
|
||||
struct scanner_token *token, struct scanner_token *valtoken)
|
||||
struct dom_scanner_token *token, struct dom_scanner_token *valtoken)
|
||||
{
|
||||
struct sgml_parser *parser = get_sgml_parser(stack);
|
||||
struct dom_node *parent = get_dom_stack_top(stack)->node;
|
||||
@ -109,7 +109,7 @@ add_sgml_attribute(struct dom_stack *stack,
|
||||
}
|
||||
|
||||
static inline struct dom_node *
|
||||
add_sgml_proc_instruction(struct dom_stack *stack, struct scanner_token *token)
|
||||
add_sgml_proc_instruction(struct dom_stack *stack, struct dom_scanner_token *token)
|
||||
{
|
||||
struct dom_node *parent = get_dom_stack_top(stack)->node;
|
||||
struct dom_node *node;
|
||||
@ -147,7 +147,7 @@ add_sgml_proc_instruction(struct dom_stack *stack, struct scanner_token *token)
|
||||
}
|
||||
|
||||
static inline void
|
||||
add_sgml_node(struct dom_stack *stack, enum dom_node_type type, struct scanner_token *token)
|
||||
add_sgml_node(struct dom_stack *stack, enum dom_node_type type, struct dom_scanner_token *token)
|
||||
{
|
||||
struct dom_node *parent = get_dom_stack_top(stack)->node;
|
||||
struct dom_node *node = add_dom_node(parent, type, token->string, token->length);
|
||||
@ -165,24 +165,24 @@ add_sgml_node(struct dom_stack *stack, enum dom_node_type type, struct scanner_t
|
||||
/* SGML parser main handling: */
|
||||
|
||||
static inline void
|
||||
parse_sgml_attributes(struct dom_stack *stack, struct scanner *scanner)
|
||||
parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner)
|
||||
{
|
||||
struct scanner_token name;
|
||||
struct dom_scanner_token name;
|
||||
|
||||
assert(scanner_has_tokens(scanner)
|
||||
&& (get_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN
|
||||
|| get_scanner_token(scanner)->type == SGML_TOKEN_PROCESS_XML));
|
||||
assert(dom_scanner_has_tokens(scanner)
|
||||
&& (get_dom_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN
|
||||
|| get_dom_scanner_token(scanner)->type == SGML_TOKEN_PROCESS_XML));
|
||||
|
||||
skip_scanner_token(scanner);
|
||||
skip_dom_scanner_token(scanner);
|
||||
|
||||
while (scanner_has_tokens(scanner)) {
|
||||
struct scanner_token *token = get_scanner_token(scanner);
|
||||
while (dom_scanner_has_tokens(scanner)) {
|
||||
struct dom_scanner_token *token = get_dom_scanner_token(scanner);
|
||||
|
||||
assert(token);
|
||||
|
||||
switch (token->type) {
|
||||
case SGML_TOKEN_TAG_END:
|
||||
skip_scanner_token(scanner);
|
||||
skip_dom_scanner_token(scanner);
|
||||
/* and return */
|
||||
case SGML_TOKEN_ELEMENT:
|
||||
case SGML_TOKEN_ELEMENT_BEGIN:
|
||||
@ -194,11 +194,11 @@ parse_sgml_attributes(struct dom_stack *stack, struct scanner *scanner)
|
||||
copy_struct(&name, token);
|
||||
|
||||
/* Skip the attribute name token */
|
||||
token = get_next_scanner_token(scanner);
|
||||
token = get_next_dom_scanner_token(scanner);
|
||||
if (token && token->type == '=') {
|
||||
/* If the token is not a valid value token
|
||||
* ignore it. */
|
||||
token = get_next_scanner_token(scanner);
|
||||
token = get_next_dom_scanner_token(scanner);
|
||||
if (token
|
||||
&& token->type != SGML_TOKEN_IDENT
|
||||
&& token->type != SGML_TOKEN_ATTRIBUTE
|
||||
@ -212,28 +212,28 @@ parse_sgml_attributes(struct dom_stack *stack, struct scanner *scanner)
|
||||
|
||||
/* Skip the value token */
|
||||
if (token)
|
||||
skip_scanner_token(scanner);
|
||||
skip_dom_scanner_token(scanner);
|
||||
break;
|
||||
|
||||
default:
|
||||
skip_scanner_token(scanner);
|
||||
skip_dom_scanner_token(scanner);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
parse_sgml_plain(struct dom_stack *stack, struct scanner *scanner)
|
||||
parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
|
||||
{
|
||||
while (scanner_has_tokens(scanner)) {
|
||||
struct scanner_token *token = get_scanner_token(scanner);
|
||||
while (dom_scanner_has_tokens(scanner)) {
|
||||
struct dom_scanner_token *token = get_dom_scanner_token(scanner);
|
||||
|
||||
switch (token->type) {
|
||||
case SGML_TOKEN_ELEMENT:
|
||||
case SGML_TOKEN_ELEMENT_BEGIN:
|
||||
if (!add_sgml_element(stack, token)) {
|
||||
if (token->type == SGML_TOKEN_ELEMENT) {
|
||||
skip_scanner_token(scanner);
|
||||
skip_dom_scanner_token(scanner);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -244,14 +244,14 @@ parse_sgml_plain(struct dom_stack *stack, struct scanner *scanner)
|
||||
if (token->type == SGML_TOKEN_ELEMENT_BEGIN) {
|
||||
parse_sgml_attributes(stack, scanner);
|
||||
} else {
|
||||
skip_scanner_token(scanner);
|
||||
skip_dom_scanner_token(scanner);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case SGML_TOKEN_ELEMENT_EMPTY_END:
|
||||
pop_dom_node(stack);
|
||||
skip_scanner_token(scanner);
|
||||
skip_dom_scanner_token(scanner);
|
||||
break;
|
||||
|
||||
case SGML_TOKEN_ELEMENT_END:
|
||||
@ -273,12 +273,12 @@ parse_sgml_plain(struct dom_stack *stack, struct scanner *scanner)
|
||||
pop_dom_state(stack, state);
|
||||
}
|
||||
}
|
||||
skip_scanner_token(scanner);
|
||||
skip_dom_scanner_token(scanner);
|
||||
break;
|
||||
|
||||
case SGML_TOKEN_NOTATION_COMMENT:
|
||||
add_sgml_node(stack, DOM_NODE_COMMENT, token);
|
||||
skip_scanner_token(scanner);
|
||||
skip_dom_scanner_token(scanner);
|
||||
break;
|
||||
|
||||
case SGML_TOKEN_NOTATION_ATTLIST:
|
||||
@ -286,12 +286,12 @@ parse_sgml_plain(struct dom_stack *stack, struct scanner *scanner)
|
||||
case SGML_TOKEN_NOTATION_ELEMENT:
|
||||
case SGML_TOKEN_NOTATION_ENTITY:
|
||||
case SGML_TOKEN_NOTATION:
|
||||
skip_scanner_token(scanner);
|
||||
skip_dom_scanner_token(scanner);
|
||||
break;
|
||||
|
||||
case SGML_TOKEN_CDATA_SECTION:
|
||||
add_sgml_node(stack, DOM_NODE_CDATA_SECTION, token);
|
||||
skip_scanner_token(scanner);
|
||||
skip_dom_scanner_token(scanner);
|
||||
break;
|
||||
|
||||
case SGML_TOKEN_PROCESS_XML:
|
||||
@ -306,19 +306,19 @@ parse_sgml_plain(struct dom_stack *stack, struct scanner *scanner)
|
||||
|
||||
case SGML_TOKEN_PROCESS:
|
||||
add_sgml_proc_instruction(stack, token);
|
||||
skip_scanner_token(scanner);
|
||||
skip_dom_scanner_token(scanner);
|
||||
break;
|
||||
|
||||
case SGML_TOKEN_ENTITY:
|
||||
add_sgml_node(stack, DOM_NODE_ENTITY_REFERENCE, token);
|
||||
skip_scanner_token(scanner);
|
||||
skip_dom_scanner_token(scanner);
|
||||
break;
|
||||
|
||||
case SGML_TOKEN_SPACE:
|
||||
case SGML_TOKEN_TEXT:
|
||||
default:
|
||||
add_sgml_node(stack, DOM_NODE_TEXT, token);
|
||||
skip_scanner_token(scanner);
|
||||
skip_dom_scanner_token(scanner);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -365,7 +365,7 @@ sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
|
||||
parsing->depth = parser->stack.depth;
|
||||
get_dom_stack_top(&parser->stack)->immutable = 1;
|
||||
init_scanner(&parsing->scanner, &sgml_scanner_info, source, end);
|
||||
init_dom_scanner(&parsing->scanner, &sgml_scanner_info, source, end);
|
||||
}
|
||||
|
||||
static void
|
@ -1,11 +1,11 @@
|
||||
|
||||
#ifndef EL__DOCUMENT_SGML_PARSER_H
|
||||
#define EL__DOCUMENT_SGML_PARSER_H
|
||||
#ifndef EL_DOM_SGML_PARSER_H
|
||||
#define EL_DOM_SGML_PARSER_H
|
||||
|
||||
#include "document/dom/node.h"
|
||||
#include "document/dom/stack.h"
|
||||
#include "document/sgml/sgml.h"
|
||||
#include "util/scanner.h"
|
||||
#include "dom/node.h"
|
||||
#include "dom/stack.h"
|
||||
#include "dom/sgml/sgml.h"
|
||||
#include "dom/scanner.h"
|
||||
|
||||
struct string;
|
||||
struct uri;
|
||||
@ -27,7 +27,7 @@ enum sgml_parser_type {
|
||||
* used to feed output of stuff like ECMAScripts document.write() from
|
||||
* <script>-elements back to the SGML parser. */
|
||||
struct sgml_parsing_state {
|
||||
struct scanner scanner;
|
||||
struct dom_scanner scanner;
|
||||
struct dom_node *node;
|
||||
size_t depth;
|
||||
};
|
||||
@ -48,7 +48,7 @@ struct sgml_parser_state {
|
||||
struct sgml_node_info *info;
|
||||
/* This is used by the DOM source renderer for highlighting the
|
||||
* end-tag of an element. */
|
||||
struct scanner_token end_token;
|
||||
struct dom_scanner_token end_token;
|
||||
};
|
||||
|
||||
struct sgml_parser *
|
@ -9,8 +9,8 @@
|
||||
|
||||
#include "elinks.h"
|
||||
|
||||
#include "document/sgml/rss/rss.h"
|
||||
#include "document/sgml/sgml.h"
|
||||
#include "dom/sgml/rss/rss.h"
|
||||
#include "dom/sgml/sgml.h"
|
||||
|
||||
|
||||
#define RSS_(node, name, id) SGML_NODE_INFO(RSS, node, name, id)
|
||||
@ -18,13 +18,13 @@
|
||||
static struct sgml_node_info rss_attributes[RSS_ATTRIBUTES] = {
|
||||
SGML_NODE_HEAD(RSS, ATTRIBUTE),
|
||||
|
||||
#include "document/sgml/rss/attribute.inc"
|
||||
#include "dom/sgml/rss/attribute.inc"
|
||||
};
|
||||
|
||||
static struct sgml_node_info rss_elements[RSS_ELEMENTS] = {
|
||||
SGML_NODE_HEAD(RSS, ELEMENT),
|
||||
|
||||
#include "document/sgml/rss/element.inc"
|
||||
#include "dom/sgml/rss/element.inc"
|
||||
};
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#ifndef EL__DOCUMENT_SGML_RSS_RSS_H
|
||||
#define EL__DOCUMENT_SGML_RSS_RSS_H
|
||||
#ifndef EL_DOM_SGML_RSS_RSS_H
|
||||
#define EL_DOM_SGML_RSS_RSS_H
|
||||
|
||||
#include "document/sgml/sgml.h"
|
||||
#include "dom/sgml/sgml.h"
|
||||
|
||||
extern struct sgml_info sgml_rss_info;
|
||||
|
||||
@ -10,7 +10,7 @@ extern struct sgml_info sgml_rss_info;
|
||||
enum rss_element_type {
|
||||
RSS_ELEMENT_UNKNOWN,
|
||||
|
||||
#include "document/sgml/rss/element.inc"
|
||||
#include "dom/sgml/rss/element.inc"
|
||||
|
||||
RSS_ELEMENTS,
|
||||
};
|
||||
@ -18,7 +18,7 @@ enum rss_element_type {
|
||||
enum rss_attribute_type {
|
||||
RSS_ATTRIBUTE_UNKNOWN,
|
||||
|
||||
#include "document/sgml/rss/attribute.inc"
|
||||
#include "dom/sgml/rss/attribute.inc"
|
||||
|
||||
RSS_ATTRIBUTES,
|
||||
};
|
@ -9,9 +9,9 @@
|
||||
|
||||
#include "elinks.h"
|
||||
|
||||
#include "document/sgml/scanner.h"
|
||||
#include "dom/sgml/scanner.h"
|
||||
#include "util/error.h"
|
||||
#include "util/scanner.h"
|
||||
#include "dom/scanner.h"
|
||||
#include "util/string.h"
|
||||
|
||||
|
||||
@ -34,24 +34,24 @@ enum sgml_char_group {
|
||||
SGML_CHAR_NOT_ATTRIBUTE = (1 << 6),
|
||||
};
|
||||
|
||||
static struct scan_table_info sgml_scan_table_info[] = {
|
||||
SCAN_TABLE_RANGE("0", '9', SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
|
||||
SCAN_TABLE_RANGE("A", 'Z', SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
|
||||
SCAN_TABLE_RANGE("a", 'z', SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
|
||||
static struct dom_scan_table_info sgml_scan_table_info[] = {
|
||||
DOM_SCAN_TABLE_RANGE("0", '9', SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
|
||||
DOM_SCAN_TABLE_RANGE("A", 'Z', SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
|
||||
DOM_SCAN_TABLE_RANGE("a", 'z', SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
|
||||
/* For the octal number impared (me including) \241 is 161 --jonas */
|
||||
SCAN_TABLE_RANGE("\241", 255, SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
|
||||
DOM_SCAN_TABLE_RANGE("\241", 255, SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
|
||||
|
||||
SCAN_TABLE_STRING("-_:.", SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
|
||||
SCAN_TABLE_STRING("#", SGML_CHAR_ENTITY),
|
||||
SCAN_TABLE_STRING(" \f\n\r\t\v", SGML_CHAR_WHITESPACE),
|
||||
SCAN_TABLE_STRING("\f\n\r", SGML_CHAR_NEWLINE),
|
||||
SCAN_TABLE_STRING("<&", SGML_CHAR_NOT_TEXT),
|
||||
SCAN_TABLE_STRING("<=>", SGML_CHAR_NOT_ATTRIBUTE),
|
||||
DOM_SCAN_TABLE_STRING("-_:.", SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
|
||||
DOM_SCAN_TABLE_STRING("#", SGML_CHAR_ENTITY),
|
||||
DOM_SCAN_TABLE_STRING(" \f\n\r\t\v", SGML_CHAR_WHITESPACE),
|
||||
DOM_SCAN_TABLE_STRING("\f\n\r", SGML_CHAR_NEWLINE),
|
||||
DOM_SCAN_TABLE_STRING("<&", SGML_CHAR_NOT_TEXT),
|
||||
DOM_SCAN_TABLE_STRING("<=>", SGML_CHAR_NOT_ATTRIBUTE),
|
||||
|
||||
SCAN_TABLE_END,
|
||||
DOM_SCAN_TABLE_END,
|
||||
};
|
||||
|
||||
static struct scanner_string_mapping sgml_string_mappings[] = {
|
||||
static struct dom_scanner_string_mapping sgml_string_mappings[] = {
|
||||
{ "--", SGML_TOKEN_NOTATION_COMMENT, SGML_TOKEN_NOTATION },
|
||||
{ "ATTLIST", SGML_TOKEN_NOTATION_ATTLIST, SGML_TOKEN_NOTATION },
|
||||
{ "DOCTYPE", SGML_TOKEN_NOTATION_DOCTYPE, SGML_TOKEN_NOTATION },
|
||||
@ -63,9 +63,9 @@ static struct scanner_string_mapping sgml_string_mappings[] = {
|
||||
{ NULL, SGML_TOKEN_NONE, SGML_TOKEN_NONE },
|
||||
};
|
||||
|
||||
static struct scanner_token *scan_sgml_tokens(struct scanner *scanner);
|
||||
static struct dom_scanner_token *scan_sgml_tokens(struct dom_scanner *scanner);
|
||||
|
||||
struct scanner_info sgml_scanner_info = {
|
||||
struct dom_scanner_info sgml_scanner_info = {
|
||||
sgml_string_mappings,
|
||||
sgml_scan_table_info,
|
||||
scan_sgml_tokens,
|
||||
@ -91,7 +91,7 @@ struct scanner_info sgml_scanner_info = {
|
||||
for (; ((str) < (scanner)->end && *(str) != '<' && *(str) != '&'); (str)++)
|
||||
|
||||
static inline void
|
||||
scan_sgml_text_token(struct scanner *scanner, struct scanner_token *token)
|
||||
scan_sgml_text_token(struct dom_scanner *scanner, struct dom_scanner_token *token)
|
||||
{
|
||||
unsigned char *string = scanner->position;
|
||||
unsigned char first_char = *string;
|
||||
@ -151,7 +151,7 @@ check_sgml_precedence(int type, int skipto)
|
||||
/* XXX: Only element or ``in tag'' precedence is handled correctly however
|
||||
* using this function for CDATA or text would be overkill. */
|
||||
static inline unsigned char *
|
||||
skip_sgml(struct scanner *scanner, unsigned char **string, unsigned char skipto,
|
||||
skip_sgml(struct dom_scanner *scanner, unsigned char **string, unsigned char skipto,
|
||||
int check_quoting)
|
||||
{
|
||||
unsigned char *pos = *string;
|
||||
@ -178,7 +178,7 @@ skip_sgml(struct scanner *scanner, unsigned char **string, unsigned char skipto,
|
||||
}
|
||||
|
||||
static inline int
|
||||
skip_comment(struct scanner *scanner, unsigned char **string)
|
||||
skip_comment(struct dom_scanner *scanner, unsigned char **string)
|
||||
{
|
||||
unsigned char *pos = *string;
|
||||
int length = 0;
|
||||
@ -195,7 +195,7 @@ skip_comment(struct scanner *scanner, unsigned char **string)
|
||||
}
|
||||
|
||||
static inline int
|
||||
skip_cdata_section(struct scanner *scanner, unsigned char **string)
|
||||
skip_cdata_section(struct dom_scanner *scanner, unsigned char **string)
|
||||
{
|
||||
unsigned char *pos = *string;
|
||||
int length = 0;
|
||||
@ -216,7 +216,7 @@ skip_cdata_section(struct scanner *scanner, unsigned char **string)
|
||||
(str)++;
|
||||
|
||||
static inline void
|
||||
scan_sgml_element_token(struct scanner *scanner, struct scanner_token *token)
|
||||
scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *token)
|
||||
{
|
||||
unsigned char *string = scanner->position;
|
||||
unsigned char first_char = *string;
|
||||
@ -278,7 +278,7 @@ scan_sgml_element_token(struct scanner *scanner, struct scanner_token *token)
|
||||
|
||||
} else {
|
||||
scan_sgml(scanner, string, SGML_CHAR_IDENT);
|
||||
type = map_scanner_string(scanner, ident, string, base);
|
||||
type = map_dom_scanner_string(scanner, ident, string, base);
|
||||
skip_sgml(scanner, &string, '>', 0);
|
||||
}
|
||||
|
||||
@ -291,7 +291,7 @@ scan_sgml_element_token(struct scanner *scanner, struct scanner_token *token)
|
||||
token->string = pos = string;
|
||||
scan_sgml(scanner, string, SGML_CHAR_IDENT);
|
||||
|
||||
type = map_scanner_string(scanner, pos, string, base);
|
||||
type = map_dom_scanner_string(scanner, pos, string, base);
|
||||
|
||||
/* Figure out where the processing instruction ends */
|
||||
for (pos = string; skip_sgml(scanner, &pos, '>', 0); ) {
|
||||
@ -402,14 +402,14 @@ scan_sgml_element_token(struct scanner *scanner, struct scanner_token *token)
|
||||
|
||||
/* Scanner multiplexor */
|
||||
|
||||
static struct scanner_token *
|
||||
scan_sgml_tokens(struct scanner *scanner)
|
||||
static struct dom_scanner_token *
|
||||
scan_sgml_tokens(struct dom_scanner *scanner)
|
||||
{
|
||||
struct scanner_token *table_end = scanner->table + SCANNER_TOKENS;
|
||||
struct scanner_token *current;
|
||||
struct dom_scanner_token *table_end = scanner->table + DOM_SCANNER_TOKENS;
|
||||
struct dom_scanner_token *current;
|
||||
|
||||
if (!begin_token_scanning(scanner))
|
||||
return get_scanner_token(scanner);
|
||||
if (!begin_dom_token_scanning(scanner))
|
||||
return get_dom_scanner_token(scanner);
|
||||
|
||||
/* Scan tokens until we fill the table */
|
||||
for (current = scanner->table + scanner->tokens;
|
||||
@ -431,5 +431,5 @@ scan_sgml_tokens(struct scanner *scanner)
|
||||
}
|
||||
}
|
||||
|
||||
return end_token_scanning(scanner, current);
|
||||
return end_dom_token_scanning(scanner, current);
|
||||
}
|
@ -1,8 +1,8 @@
|
||||
|
||||
#ifndef EL__DOCUMENT_SGML_SCANNER_H
|
||||
#define EL__DOCUMENT_SGML_SCANNER_H
|
||||
#ifndef EL_DOM_SGML_SCANNER_H
|
||||
#define EL_DOM_SGML_SCANNER_H
|
||||
|
||||
#include "util/scanner.h"
|
||||
#include "dom/scanner.h"
|
||||
|
||||
enum sgml_token_type {
|
||||
/* Char tokens: */
|
||||
@ -56,7 +56,7 @@ enum sgml_token_type {
|
||||
SGML_TOKEN_NONE = 0,
|
||||
};
|
||||
|
||||
extern struct scanner_info sgml_scanner_info;
|
||||
extern struct dom_scanner_info sgml_scanner_info;
|
||||
|
||||
/* Treat '<' as more valuable then '>' so that scanning of '<a<b>' using
|
||||
* skipping to next '>' will stop at the second '<'. */
|
||||
@ -65,6 +65,6 @@ extern struct scanner_info sgml_scanner_info;
|
||||
(token_type) == '>' ? (1 << 10) : 0)
|
||||
|
||||
#define skip_sgml_tokens(scanner, type) \
|
||||
skip_scanner_tokens(scanner, type, get_sgml_precedence(type))
|
||||
skip_dom_scanner_tokens(scanner, type, get_sgml_precedence(type))
|
||||
|
||||
#endif
|
@ -9,15 +9,15 @@
|
||||
|
||||
#include "elinks.h"
|
||||
|
||||
#include "document/dom/node.h"
|
||||
#include "document/sgml/sgml.h"
|
||||
#include "dom/node.h"
|
||||
#include "dom/sgml/sgml.h"
|
||||
#include "util/error.h"
|
||||
#include "util/string.h"
|
||||
|
||||
/* Backend includes: */
|
||||
|
||||
#include "document/sgml/html/html.h"
|
||||
#include "document/sgml/rss/rss.h"
|
||||
#include "dom/sgml/html/html.h"
|
||||
#include "dom/sgml/rss/rss.h"
|
||||
|
||||
|
||||
int
|
@ -1,11 +1,11 @@
|
||||
|
||||
#ifndef EL__DOCUMENT_SGML_SGML_H
|
||||
#define EL__DOCUMENT_SGML_SGML_H
|
||||
#ifndef EL_DOM_SGML_SGML_H
|
||||
#define EL_DOM_SGML_SGML_H
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "document/dom/stack.h"
|
||||
#include "document/dom/string.h"
|
||||
#include "dom/stack.h"
|
||||
#include "dom/string.h"
|
||||
|
||||
/* The flags stored in the attribute sgml node info data */
|
||||
/* TODO: Other potential flags (there can be only 16)
|
@ -9,8 +9,8 @@
|
||||
|
||||
#include "elinks.h"
|
||||
|
||||
#include "document/dom/node.h"
|
||||
#include "document/dom/stack.h"
|
||||
#include "dom/node.h"
|
||||
#include "dom/stack.h"
|
||||
#include "util/memory.h"
|
||||
#include "util/string.h"
|
||||
|
@ -1,7 +1,7 @@
|
||||
#ifndef EL__DOCUMENT_DOM_STACK_H
|
||||
#define EL__DOCUMENT_DOM_STACK_H
|
||||
#ifndef EL_DOM_STACK_H
|
||||
#define EL_DOM_STACK_H
|
||||
|
||||
#include "document/dom/node.h"
|
||||
#include "dom/node.h"
|
||||
#include "util/error.h"
|
||||
#include "util/hash.h"
|
||||
|
@ -1,5 +1,5 @@
|
||||
#ifndef EL__DOCUMENT_DOM_STRING_H
|
||||
#define EL__DOCUMENT_DOM_STRING_H
|
||||
#ifndef EL_DOM_STRING_H
|
||||
#define EL_DOM_STRING_H
|
||||
|
||||
struct dom_string {
|
||||
size_t length;
|
Loading…
x
Reference in New Issue
Block a user