1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-10-01 03:36:26 -04:00

Merge with git+ssh://pasky.or.cz/srv/git/elinks.git

This commit is contained in:
Miciah Dashiel Butler Masters 2005-12-29 00:57:46 +00:00 committed by Miciah Dashiel Butler Masters
commit 006445cd09
42 changed files with 1459 additions and 280 deletions

View File

@ -7,6 +7,7 @@ SUBDIRS-$(CONFIG_FORMHIST) += formhist
SUBDIRS-$(CONFIG_GLOBHIST) += globhist SUBDIRS-$(CONFIG_GLOBHIST) += globhist
SUBDIRS-$(CONFIG_ECMASCRIPT) += ecmascript SUBDIRS-$(CONFIG_ECMASCRIPT) += ecmascript
SUBDIRS-$(CONFIG_SCRIPTING) += scripting SUBDIRS-$(CONFIG_SCRIPTING) += scripting
SUBDIRS-$(CONFIG_DOM) += dom
SUBDIRS = \ SUBDIRS = \
bfu \ bfu \

View File

@ -2,8 +2,19 @@ top_builddir=../..
include $(top_builddir)/Makefile.config include $(top_builddir)/Makefile.config
OBJS = cookies.o dialogs.o parser.o OBJS = cookies.o dialogs.o parser.o
PROG = parsetst
parsetst: parser.o parsetst.o PARSETSTDEPS = \
$(call cmd,link) -L../util/libutil.a $(top_builddir)/src/util/error.o \
$(top_builddir)/src/util/string.o
ifdef CONFIG_DEBUG
PARSETSTDEPS += $(top_builddir)/src/util/memdebug.o
endif
parsetst: $(PARSETSTDEPS) parser.o parsetst.o
$(call cmd,link)
CLEAN += parsetst.o
include $(top_srcdir)/Makefile.lib include $(top_srcdir)/Makefile.lib

View File

@ -2,7 +2,7 @@ top_builddir=../..
include $(top_builddir)/Makefile.config include $(top_builddir)/Makefile.config
SUBDIRS-$(CONFIG_CSS) += css SUBDIRS-$(CONFIG_CSS) += css
SUBDIRS-$(CONFIG_DOM) += dom sgml SUBDIRS-$(CONFIG_DOM) += dom
SUBDIRS = html plain SUBDIRS = html plain

View File

@ -1,6 +1,6 @@
top_builddir=../../.. top_builddir=../../..
include $(top_builddir)/Makefile.config include $(top_builddir)/Makefile.config
OBJS = node.o renderer.o select.o stack.o OBJS = renderer.o
include $(top_srcdir)/Makefile.lib include $(top_srcdir)/Makefile.lib

View File

@ -20,11 +20,12 @@
#include "document/css/stylesheet.h" #include "document/css/stylesheet.h"
#include "document/docdata.h" #include "document/docdata.h"
#include "document/document.h" #include "document/document.h"
#include "document/dom/node.h"
#include "document/dom/renderer.h" #include "document/dom/renderer.h"
#include "document/dom/stack.h"
#include "document/renderer.h" #include "document/renderer.h"
#include "document/sgml/parser.h" #include "dom/scanner.h"
#include "dom/sgml/parser.h"
#include "dom/node.h"
#include "dom/stack.h"
#include "intl/charsets.h" #include "intl/charsets.h"
#include "globhist/globhist.h" /* get_global_history_item() */ #include "globhist/globhist.h" /* get_global_history_item() */
#include "protocol/uri.h" #include "protocol/uri.h"
@ -32,7 +33,6 @@
#include "util/box.h" #include "util/box.h"
#include "util/error.h" #include "util/error.h"
#include "util/memory.h" #include "util/memory.h"
#include "util/scanner.h"
#include "util/snprintf.h" #include "util/snprintf.h"
#include "util/string.h" #include "util/string.h"
@ -506,9 +506,9 @@ render_dom_element_end_source(struct dom_stack *stack, struct dom_node *node, vo
struct dom_renderer *renderer = stack->current->data; struct dom_renderer *renderer = stack->current->data;
struct dom_stack_state *state = get_dom_stack_top(stack); struct dom_stack_state *state = get_dom_stack_top(stack);
struct sgml_parser_state *pstate = get_dom_stack_state_data(stack->contexts[0], state); struct sgml_parser_state *pstate = get_dom_stack_state_data(stack->contexts[0], state);
struct scanner_token *token = &pstate->end_token; struct dom_scanner_token *token = &pstate->end_token;
unsigned char *string = token->string; unsigned char *string = token->string.string;
int length = token->length; int length = token->string.length;
assert(node && renderer && renderer->document); assert(node && renderer && renderer->document);
@ -670,6 +670,10 @@ render_dom_document(struct cache_entry *cached, struct document *document,
struct conv_table *convert_table; struct conv_table *convert_table;
struct sgml_parser *parser; struct sgml_parser *parser;
enum sgml_document_type doctype; enum sgml_document_type doctype;
unsigned char *string = struri(cached->uri);
size_t length = strlen(string);
struct dom_string uri = INIT_DOM_STRING(string, length);
struct dom_string source = INIT_DOM_STRING(buffer->source, buffer->length);
assert(document->options.plain); assert(document->options.plain);
@ -689,14 +693,14 @@ render_dom_document(struct cache_entry *cached, struct document *document,
else else
doctype = SGML_DOCTYPE_HTML; doctype = SGML_DOCTYPE_HTML;
parser = init_sgml_parser(SGML_PARSER_STREAM, doctype, cached->uri); parser = init_sgml_parser(SGML_PARSER_STREAM, doctype, &uri);
if (!parser) return; if (!parser) return;
add_dom_stack_context(&parser->stack, &renderer, add_dom_stack_context(&parser->stack, &renderer,
&dom_source_renderer_context_info); &dom_source_renderer_context_info);
add_dom_stack_tracer(&parser->stack); add_dom_stack_tracer(&parser->stack);
root = parse_sgml(parser, buffer); root = parse_sgml(parser, &source);
if (root) { if (root) {
assert(parser->stack.depth == 1); assert(parser->stack.depth == 1);

View File

@ -1,2 +0,0 @@
:set runtimepath+=.
:runtime ../../../.vimrc

View File

@ -1,2 +0,0 @@
:set runtimepath+=.
:runtime ../../../.vimrc

9
src/dom/Makefile Normal file
View File

@ -0,0 +1,9 @@
top_builddir=../..
include $(top_builddir)/Makefile.config
SUBDIRS = css sgml
OBJS = node.o select.o stack.o scanner.o
SUBDIRS-$(CONFIG_DEBUG) += test
include $(top_srcdir)/Makefile.lib

6
src/dom/css/Makefile Normal file
View File

@ -0,0 +1,6 @@
top_builddir=../../..
include $(top_builddir)/Makefile.config
OBJS = scanner.o
include $(top_srcdir)/Makefile.lib

388
src/dom/css/scanner.c Normal file
View File

@ -0,0 +1,388 @@
/* CSS token scanner utilities */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <string.h>
#include "elinks.h"
#include "dom/css/scanner.h"
#include "dom/scanner.h"
#include "dom/string.h"
#include "util/error.h"
/* Bitmap entries for the CSS character groups used in the scanner table */
enum css_char_group {
CSS_CHAR_ALPHA = (1 << 0),
CSS_CHAR_DIGIT = (1 << 1),
CSS_CHAR_HEX_DIGIT = (1 << 2),
CSS_CHAR_IDENT = (1 << 3),
CSS_CHAR_IDENT_START = (1 << 4),
CSS_CHAR_NEWLINE = (1 << 5),
CSS_CHAR_NON_ASCII = (1 << 6),
CSS_CHAR_SGML_MARKUP = (1 << 7),
CSS_CHAR_TOKEN = (1 << 8),
CSS_CHAR_TOKEN_START = (1 << 9),
CSS_CHAR_WHITESPACE = (1 << 10),
};
static const struct dom_scan_table_info css_scan_table_info[] = {
DOM_SCAN_TABLE_RANGE("0", '9', CSS_CHAR_DIGIT | CSS_CHAR_HEX_DIGIT | CSS_CHAR_IDENT),
DOM_SCAN_TABLE_RANGE("A", 'F', CSS_CHAR_HEX_DIGIT),
DOM_SCAN_TABLE_RANGE("A", 'Z', CSS_CHAR_ALPHA | CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),
DOM_SCAN_TABLE_RANGE("a", 'f', CSS_CHAR_HEX_DIGIT),
DOM_SCAN_TABLE_RANGE("a", 'z', CSS_CHAR_ALPHA | CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),
/* For the octal number impared (me including) \241 is 161 --jonas */
DOM_SCAN_TABLE_RANGE("\241", 255, CSS_CHAR_NON_ASCII | CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),
DOM_SCAN_TABLE_STRING(" \f\n\r\t\v\000", CSS_CHAR_WHITESPACE),
DOM_SCAN_TABLE_STRING("\f\n\r", CSS_CHAR_NEWLINE),
DOM_SCAN_TABLE_STRING("-", CSS_CHAR_IDENT),
DOM_SCAN_TABLE_STRING(".#@!\"'<-/|^$*", CSS_CHAR_TOKEN_START),
/* Unicode escape (that we do not handle yet) + other special chars */
DOM_SCAN_TABLE_STRING("\\_", CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),
/* This should contain mostly used char tokens like ':' and maybe a few
* garbage chars that people might put in their CSS code */
DOM_SCAN_TABLE_STRING("[({})];:,.>+~", CSS_CHAR_TOKEN),
DOM_SCAN_TABLE_STRING("<![CDATA]->", CSS_CHAR_SGML_MARKUP),
DOM_SCAN_TABLE_END,
};
#define CSS_STRING_MAP(str, type, family) \
{ INIT_DOM_STRING(str, -1), CSS_TOKEN_##type, CSS_TOKEN_##family }
static const struct dom_scanner_string_mapping css_string_mappings[] = {
CSS_STRING_MAP("Hz", FREQUENCY, DIMENSION),
CSS_STRING_MAP("cm", LENGTH, DIMENSION),
CSS_STRING_MAP("deg", ANGLE, DIMENSION),
CSS_STRING_MAP("em", EM, DIMENSION),
CSS_STRING_MAP("ex", EX, DIMENSION),
CSS_STRING_MAP("grad", ANGLE, DIMENSION),
CSS_STRING_MAP("in", LENGTH, DIMENSION),
CSS_STRING_MAP("kHz", FREQUENCY, DIMENSION),
CSS_STRING_MAP("mm", LENGTH, DIMENSION),
CSS_STRING_MAP("ms", TIME, DIMENSION),
CSS_STRING_MAP("pc", LENGTH, DIMENSION),
CSS_STRING_MAP("pt", LENGTH, DIMENSION),
CSS_STRING_MAP("px", LENGTH, DIMENSION),
CSS_STRING_MAP("rad", ANGLE, DIMENSION),
CSS_STRING_MAP("s", TIME, DIMENSION),
CSS_STRING_MAP("rgb", RGB, FUNCTION),
CSS_STRING_MAP("url", URL, FUNCTION),
CSS_STRING_MAP("charset", AT_CHARSET, AT_KEYWORD),
CSS_STRING_MAP("font-face", AT_FONT_FACE, AT_KEYWORD),
CSS_STRING_MAP("import", AT_IMPORT, AT_KEYWORD),
CSS_STRING_MAP("media", AT_MEDIA, AT_KEYWORD),
CSS_STRING_MAP("page", AT_PAGE, AT_KEYWORD),
DOM_STRING_MAP_END,
};
static struct dom_scanner_token *scan_css_tokens(struct dom_scanner *scanner);
struct dom_scanner_info dom_css_scanner_info = {
css_string_mappings,
css_scan_table_info,
scan_css_tokens,
};
#define check_css_table(c, bit) (dom_css_scanner_info.scan_table[(c)] & (bit))
#define scan_css(scanner, s, bit) \
while ((s) < (scanner)->end && check_css_table(*(s), bit)) (s)++;
#define scan_back_css(scanner, s, bit) \
while ((s) >= (scanner)->string && check_css_table(*(s), bit)) (s)--;
#define is_css_ident_start(c) check_css_table(c, CSS_CHAR_IDENT_START)
#define is_css_ident(c) check_css_table(c, CSS_CHAR_IDENT)
#define is_css_digit(c) check_css_table(c, CSS_CHAR_DIGIT)
#define is_css_hexdigit(c) check_css_table(c, CSS_CHAR_HEX_DIGIT)
#define is_css_char_token(c) check_css_table(c, CSS_CHAR_TOKEN)
#define is_css_token_start(c) check_css_table(c, CSS_CHAR_TOKEN_START)
#define skip_css(scanner, s, skipto) \
while (s < (scanner)->end \
&& *(s) != (skipto) \
&& check_css_precedence(*(s), skipto)) { \
if (isquote(*(s))) { \
int size = (scanner)->end - (s); \
unsigned char *end = memchr(s + 1, *(s), size); \
\
if (end) (s) = end; \
} \
(s)++; \
}
static inline void
scan_css_token(struct dom_scanner *scanner, struct dom_scanner_token *token)
{
unsigned char *string = scanner->position;
unsigned char first_char = *string;
enum css_token_type type = CSS_TOKEN_GARBAGE;
int real_length = -1;
assert(first_char);
token->string.string = string++;
if (is_css_char_token(first_char)) {
type = first_char;
} else if (is_css_digit(first_char) || first_char == '.') {
scan_css(scanner, string, CSS_CHAR_DIGIT);
/* First scan the full number token */
if (*string == '.') {
string++;
if (is_css_digit(*string)) {
type = CSS_TOKEN_NUMBER;
scan_css(scanner, string, CSS_CHAR_DIGIT);
}
}
/* Check what kind of number we have */
if (*string == '%') {
if (first_char != '.')
type = CSS_TOKEN_PERCENTAGE;
string++;
} else if (!is_css_ident_start(*string)) {
type = CSS_TOKEN_NUMBER;
} else {
unsigned char *ident = string;
scan_css(scanner, string, CSS_CHAR_IDENT);
type = map_dom_scanner_string(scanner, ident, string,
CSS_TOKEN_DIMENSION);
}
} else if (is_css_ident_start(first_char)) {
scan_css(scanner, string, CSS_CHAR_IDENT);
if (*string == '(') {
unsigned char *function_end = string + 1;
/* Make sure that we have an ending ')' */
skip_css(scanner, function_end, ')');
if (*function_end == ')') {
type = map_dom_scanner_string(scanner, token->string.string,
string, CSS_TOKEN_FUNCTION);
/* If it is not a known function just skip the
* how arg stuff so we don't end up generating
* a lot of useless tokens. */
if (type == CSS_TOKEN_FUNCTION) {
string = function_end;
} else if (type == CSS_TOKEN_URL) {
/* Extracting the URL first removes any
* leading or ending whitespace and
* then see if the url is given in a
* string. If that is the case the
* string delimiters are also trimmed.
* This is not totally correct because
* we should of course handle escape
* sequences .. but that will have to
* be fixed later. */
unsigned char *from = string + 1;
unsigned char *to = function_end - 1;
scan_css(scanner, from, CSS_CHAR_WHITESPACE);
scan_back_css(scanner, to, CSS_CHAR_WHITESPACE);
if (isquote(*from)) from++;
if (isquote(*to)) to--;
token->string.string = from;
real_length = to - from + 1;
assert(real_length >= 0);
string = function_end;
}
assert(type != CSS_TOKEN_RGB || *string == '(');
assert(type != CSS_TOKEN_URL || *string == ')');
assert(type != CSS_TOKEN_FUNCTION || *string == ')');
}
string++;
} else {
type = CSS_TOKEN_IDENT;
}
} else if (!is_css_token_start(first_char)) {
/* TODO: Better composing of error tokens. For now we just
* split them down into char tokens */
} else if (first_char == '#') {
/* Check whether it is hexcolor or hash token */
if (is_css_hexdigit(*string)) {
int hexdigits;
scan_css(scanner, string, CSS_CHAR_HEX_DIGIT);
/* Check that the hexdigit sequence is either 3 or 6
* chars and it isn't just start of some non-hex ident
* string. */
hexdigits = string - token->string.string - 1;
if ((hexdigits == 3 || hexdigits == 6)
&& !is_css_ident(*string)) {
type = CSS_TOKEN_HEX_COLOR;
} else {
scan_css(scanner, string, CSS_CHAR_IDENT);
type = CSS_TOKEN_HASH;
}
} else if (is_css_ident(*string)) {
/* Not *_ident_start() because hashes are #<name>. */
scan_css(scanner, string, CSS_CHAR_IDENT);
type = CSS_TOKEN_HASH;
}
} else if (first_char == '@') {
/* Compose token containing @<ident> */
if (is_css_ident_start(*string)) {
unsigned char *ident = string;
/* Scan both ident start and ident */
scan_css(scanner, string, CSS_CHAR_IDENT);
type = map_dom_scanner_string(scanner, ident, string,
CSS_TOKEN_AT_KEYWORD);
}
} else if (first_char == '*') {
if (*string == '=') {
type = CSS_TOKEN_SELECT_CONTAINS;
string++;
} else {
type = CSS_TOKEN_IDENT;
}
} else if (first_char == '^') {
if (*string == '=') {
type = CSS_TOKEN_SELECT_BEGIN;
string++;
}
} else if (first_char == '$') {
if (*string == '=') {
type = CSS_TOKEN_SELECT_END;
string++;
}
} else if (first_char == '|') {
if (*string == '=') {
type = CSS_TOKEN_SELECT_HYPHEN_LIST;
string++;
}
} else if (first_char == '!') {
scan_css(scanner, string, CSS_CHAR_WHITESPACE);
if (!strncasecmp(string, "important", 9)) {
type = CSS_TOKEN_IMPORTANT;
string += 9;
}
} else if (isquote(first_char)) {
/* TODO: Escaped delimiters --jonas */
int size = scanner->end - string;
unsigned char *string_end = memchr(string, first_char, size);
if (string_end) {
/* We don't want the delimiters in the token */
token->string.string++;
real_length = string_end - token->string.string;
string = string_end + 1;
type = CSS_TOKEN_STRING;
}
} else if (first_char == '<' || first_char == '-') {
/* Try to navigate SGML tagsoup */
if (*string == '/') {
/* Some kind of SGML tag end ... better bail out screaming */
type = CSS_TOKEN_NONE;
} else {
unsigned char *sgml = string;
/* Skip anything looking like SGML "<!--" and "-->"
* comments + <![CDATA[ and ]]> notations. */
scan_css(scanner, sgml, CSS_CHAR_SGML_MARKUP);
if (sgml - string >= 2
&& ((first_char == '<' && *string == '!')
|| (first_char == '-' && sgml[-1] == '>'))) {
type = CSS_TOKEN_SKIP;
string = sgml;
}
}
} else if (first_char == '/') {
/* Comments */
if (*string == '*') {
type = CSS_TOKEN_SKIP;
for (string++; string < scanner->end; string++)
if (*string == '*' && string[1] == '/') {
string += 2;
break;
}
}
} else {
INTERNAL("Someone forgot to put code for recognizing tokens "
"which start with '%c'.", first_char);
}
token->type = type;
token->string.length = real_length > 0 ? real_length : string - token->string.string;
token->precedence = get_css_precedence(type);
scanner->position = string;
}
static struct dom_scanner_token *
scan_css_tokens(struct dom_scanner *scanner)
{
struct dom_scanner_token *table_end = scanner->table + DOM_SCANNER_TOKENS;
struct dom_scanner_token *current;
if (!begin_dom_token_scanning(scanner))
return get_dom_scanner_token(scanner);
/* Scan tokens until we fill the table */
for (current = scanner->table + scanner->tokens;
current < table_end && scanner->position < scanner->end;
current++) {
scan_css(scanner, scanner->position, CSS_CHAR_WHITESPACE);
if (scanner->position >= scanner->end) break;
scan_css_token(scanner, current);
/* Did some one scream for us to end the madness? */
if (current->type == CSS_TOKEN_NONE) {
scanner->position = NULL;
current--;
break;
}
/* Shall we scratch this token? */
if (current->type == CSS_TOKEN_SKIP) {
current--;
}
}
return end_dom_token_scanning(scanner, current);
}

112
src/dom/css/scanner.h Normal file
View File

@ -0,0 +1,112 @@
#ifndef EL__DOM_CSS_SCANNER_H
#define EL__DOM_CSS_SCANNER_H
#include "dom/scanner.h"
/* The various token types and what they contain. Patterns taken from
* the flex scanner declarations in the CSS 2 Specification. */
enum css_token_type {
/* Char tokens: */
/* Char tokens range from 1 to 255 and have their char value as type */
/* meaning non char tokens have values from 256 and up. */
/* Low level string tokens: */
/* {...} means char group, <...> means token */
/* {identstart} [a-z_]|{nonascii} */
/* {ident} [a-z0-9_-]|{nonascii} */
/* <ident> {identstart}{ident}* */
/* <name> {ident}+ */
/* <number> [0-9]+|[0-9]*"."[0-9]+ */
/* Percentage is put because although it looks like being composed of
* <number> and '%' floating point numbers are really not allowed but
* strtol() will round it down for us ;) */
CSS_TOKEN_IDENT = 256, /* <ident> */
CSS_TOKEN_NUMBER, /* <number> */
CSS_TOKEN_PERCENTAGE, /* <number>% */
CSS_TOKEN_STRING, /* Char sequence delimted by matching ' or " */
/* High level string tokens: */
/* The various number values; dimension being the most generic */
CSS_TOKEN_ANGLE, /* <number>rad, <number>grad or <number>deg */
CSS_TOKEN_DIMENSION, /* <number><ident> */
CSS_TOKEN_EM, /* <number>em */
CSS_TOKEN_EX, /* <number>ex */
CSS_TOKEN_FREQUENCY, /* <number>Hz or <number>kHz */
CSS_TOKEN_LENGTH, /* <number>{px,cm,mm,in,pt,pc} */
CSS_TOKEN_TIME, /* <number>ms or <number>s */
/* XXX: CSS_TOKEN_HASH conflicts with CSS_TOKEN_HEX_COLOR. Generating
* hex color tokens has precedence and the hash token user have to
* treat CSS_TOKEN_HASH and CSS_TOKEN_HEX_COLOR alike. */
CSS_TOKEN_HASH, /* #<name> */
CSS_TOKEN_HEX_COLOR, /* #[0-9a-f]\{3,6} */
/* For all unknown functions we generate on token contain both function name
* and args so scanning/parsing is easier. Besides we already check for
* ending ')'. */
/* For known functions where we need several args [like rgb()] we want
* to generate tokens for every arg and arg delimiter ( ',' or ')' ).
* Because url() is a bit triggy: it can contain both <string> and some
* chars that would other wise make the scanner probably choke we also
* include the arg in that token. Besides it will make things like
* 'background' property parsing easier. */
CSS_TOKEN_FUNCTION, /* <ident>(<args>) */
CSS_TOKEN_RGB, /* rgb( */
CSS_TOKEN_URL, /* url(<arg>) */
/* @-rule symbols */
CSS_TOKEN_AT_KEYWORD, /* @<ident> */
CSS_TOKEN_AT_CHARSET, /* @charset */
CSS_TOKEN_AT_FONT_FACE, /* @font-face */
CSS_TOKEN_AT_IMPORT, /* @import */
CSS_TOKEN_AT_MEDIA, /* @media */
CSS_TOKEN_AT_PAGE, /* @page */
CSS_TOKEN_IMPORTANT, /* !<whitespace>important */
/* TODO: Selector stuff: */
CSS_TOKEN_SELECT_SPACE_LIST, /* ~= */
CSS_TOKEN_SELECT_HYPHEN_LIST, /* |= */
CSS_TOKEN_SELECT_BEGIN, /* ^= */
CSS_TOKEN_SELECT_END, /* $= */
CSS_TOKEN_SELECT_CONTAINS, /* *= */
/* Special tokens: */
/* A special token for unrecognized strings */
CSS_TOKEN_GARBAGE,
/* Token type used internally when scanning to signal that the token
* should not be recorded in the scanners token table. */
CSS_TOKEN_SKIP,
/* Another internal token type used both to mark unused tokens in the
* scanner table as invalid or when scanning to signal that the
* scanning should end. */
CSS_TOKEN_NONE = 0,
};
extern struct dom_scanner_info dom_css_scanner_info;
#define skip_css_tokens(scanner, type) \
skip_dom_scanner_tokens(scanner, type, get_css_precedence(type))
#define get_css_precedence(token_type) \
((token_type) == '}' ? (1 << 10) : \
(token_type) == '{' ? (1 << 9) : \
(token_type) == ';' ? (1 << 8) : \
(token_type) == ')' ? (1 << 7) : 0)
/* Check whether it is safe to skip the @token when looking for @skipto. */
static inline int
check_css_precedence(int type, int skipto)
{
return get_css_precedence(type) < get_css_precedence(skipto);
}
#endif

View File

@ -1,5 +1,5 @@
#ifndef EL__DOCUMENT_DOM_DOM_H #ifndef EL_DOM_DOM_H
#define EL__DOCUMENT_DOM_DOM_H #define EL_DOM_DOM_H
enum dom_exception_code { enum dom_exception_code {
DOM_ERR_NONE = 0, DOM_ERR_NONE = 0,

View File

@ -9,12 +9,10 @@
#include "elinks.h" #include "elinks.h"
#include "document/dom/node.h" #include "dom/node.h"
#include "intl/charsets.h" #include "dom/string.h"
#include "util/hash.h" #include "util/hash.h"
#include "util/lists.h"
#include "util/memory.h" #include "util/memory.h"
#include "util/string.h"
static void done_dom_node_data(struct dom_node *node); static void done_dom_node_data(struct dom_node *node);
@ -249,7 +247,7 @@ get_dom_node_list_index(struct dom_node *parent, struct dom_node *node)
struct dom_node * struct dom_node *
init_dom_node_(unsigned char *file, int line, init_dom_node_(unsigned char *file, int line,
struct dom_node *parent, enum dom_node_type type, struct dom_node *parent, enum dom_node_type type,
unsigned char *string, size_t length) struct dom_string *string)
{ {
#ifdef DEBUG_MEMLEAK #ifdef DEBUG_MEMLEAK
struct dom_node *node = debug_mem_calloc(file, line, 1, sizeof(*node)); struct dom_node *node = debug_mem_calloc(file, line, 1, sizeof(*node));
@ -261,7 +259,7 @@ init_dom_node_(unsigned char *file, int line,
node->type = type; node->type = type;
node->parent = parent; node->parent = parent;
set_dom_string(&node->string, string, length); copy_dom_string(&node->string, string);
if (parent) { if (parent) {
struct dom_node_list **list = get_dom_node_list(parent, node); struct dom_node_list **list = get_dom_node_list(parent, node);

View File

@ -1,8 +1,8 @@
#ifndef EL__DOCUMENT_DOM_NODE_H #ifndef EL_DOM_NODE_H
#define EL__DOCUMENT_DOM_NODE_H #define EL_DOM_NODE_H
#include "document/dom/string.h" #include "dom/string.h"
#include "util/hash.h" #include "util/hash.h"
struct dom_node_list; struct dom_node_list;
@ -255,34 +255,34 @@ get_dom_node_map_entry(struct dom_node_list *node_map,
struct dom_node * struct dom_node *
init_dom_node_(unsigned char *file, int line, init_dom_node_(unsigned char *file, int line,
struct dom_node *parent, enum dom_node_type type, struct dom_node *parent, enum dom_node_type type,
unsigned char *string, size_t length); struct dom_string *string);
#define init_dom_node(type, string, length) init_dom_node_(__FILE__, __LINE__, NULL, type, string, length) #define init_dom_node(type, string) init_dom_node_(__FILE__, __LINE__, NULL, type, string)
#define add_dom_node(parent, type, string, length) init_dom_node_(__FILE__, __LINE__, parent, type, string, length) #define add_dom_node(parent, type, string) init_dom_node_(__FILE__, __LINE__, parent, type, string)
#define add_dom_element(parent, string, length) \ #define add_dom_element(parent, string) \
add_dom_node(parent, DOM_NODE_ELEMENT, string, length) add_dom_node(parent, DOM_NODE_ELEMENT, string)
static inline struct dom_node * static inline struct dom_node *
add_dom_attribute(struct dom_node *parent, unsigned char *string, int length, add_dom_attribute(struct dom_node *parent, struct dom_string *name,
unsigned char *value, size_t valuelen) struct dom_string *value)
{ {
struct dom_node *node = add_dom_node(parent, DOM_NODE_ATTRIBUTE, string, length); struct dom_node *node = add_dom_node(parent, DOM_NODE_ATTRIBUTE, name);
if (node && value) { if (node && value) {
set_dom_string(&node->data.attribute.value, value, valuelen); copy_dom_string(&node->data.attribute.value, value);
} }
return node; return node;
} }
static inline struct dom_node * static inline struct dom_node *
add_dom_proc_instruction(struct dom_node *parent, unsigned char *string, int length, add_dom_proc_instruction(struct dom_node *parent, struct dom_string *string,
unsigned char *instruction, size_t instructionlen) struct dom_string *instruction)
{ {
struct dom_node *node = add_dom_node(parent, DOM_NODE_PROCESSING_INSTRUCTION, string, length); struct dom_node *node = add_dom_node(parent, DOM_NODE_PROCESSING_INSTRUCTION, string);
if (node && instruction) { if (node && instruction) {
set_dom_string(&node->data.proc_instruction.instruction, instruction, instructionlen); copy_dom_string(&node->data.proc_instruction.instruction, instruction);
} }
return node; return node;

172
src/dom/scanner.c Normal file
View File

@ -0,0 +1,172 @@
/* A pretty generic scanner */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <string.h>
#include "elinks.h"
#include "dom/scanner.h"
#include "dom/string.h"
#include "util/error.h"
int
map_dom_scanner_string(struct dom_scanner *scanner,
unsigned char *ident, unsigned char *end, int base_type)
{
const struct dom_scanner_string_mapping *mappings = scanner->info->mappings;
struct dom_string name = INIT_DOM_STRING(ident, end - ident);
for (; is_dom_string_set(&mappings->name); mappings++) {
if (mappings->base_type == base_type
&& !dom_string_casecmp(&mappings->name, &name))
return mappings->type;
}
return base_type;
}
struct dom_scanner_token *
skip_dom_scanner_tokens(struct dom_scanner *scanner, int skipto, int precedence)
{
struct dom_scanner_token *token = get_dom_scanner_token(scanner);
/* Skip tokens while handling some basic precedens of special chars
* so we don't skip to long. */
while (token) {
if (token->type == skipto
|| token->precedence > precedence)
break;
token = get_next_dom_scanner_token(scanner);
}
return (token && token->type == skipto)
? get_next_dom_scanner_token(scanner) : NULL;
}
#ifdef DEBUG_SCANNER
void
dump_dom_scanner(struct dom_scanner *scanner)
{
unsigned char buffer[MAX_STR_LEN];
struct dom_scanner_token *token = scanner->current;
struct dom_scanner_token *table_end = scanner->table + scanner->tokens;
unsigned char *srcpos = token->string, *bufpos = buffer;
int src_lookahead = 50;
int token_lookahead = 4;
int srclen;
if (!dom_scanner_has_tokens(scanner)) return;
memset(buffer, 0, MAX_STR_LEN);
for (; token_lookahead > 0 && token < table_end; token++, token_lookahead--) {
int buflen = MAX_STR_LEN - (bufpos - buffer);
int added = snprintf(bufpos, buflen, "[%.*s] ", token->length, token->string);
bufpos += added;
}
if (scanner->tokens > token_lookahead) {
memcpy(bufpos, "... ", 4);
bufpos += 4;
}
srclen = strlen(srcpos);
int_upper_bound(&src_lookahead, srclen);
*bufpos++ = '[';
/* Compress the lookahead string */
for (; src_lookahead > 0; src_lookahead--, srcpos++, bufpos++) {
if (*srcpos == '\n' || *srcpos == '\r' || *srcpos == '\t') {
*bufpos++ = '\\';
*bufpos = *srcpos == '\n' ? 'n'
: (*srcpos == '\r' ? 'r' : 't');
} else {
*bufpos = *srcpos;
}
}
if (srclen > src_lookahead)
memcpy(bufpos, "...]", 4);
else
memcpy(bufpos, "]", 2);
errfile = scanner->file, errline = scanner->line;
elinks_wdebug("%s", buffer);
}
struct dom_scanner_token *
get_dom_scanner_token_debug(struct dom_scanner *scanner)
{
if (!dom_scanner_has_tokens(scanner)) return NULL;
dump_dom_scanner(scanner);
/* Make sure we do not return invalid tokens */
assert(!dom_scanner_has_tokens(scanner)
|| scanner->current->type != 0);
return get_dom_scanner_token(scanner);
}
#endif
/* Initializers */
static inline void
init_dom_scanner_info(struct dom_scanner_info *scanner_info)
{
const struct dom_scan_table_info *info = scanner_info->scan_table_info;
int *scan_table = scanner_info->scan_table;
int i;
if (!info) return;
for (i = 0; info[i].type != DOM_SCAN_END; i++) {
const struct dom_string *data = &info[i].data;
if (info[i].type == DOM_SCAN_RANGE) {
int index = *data->string;
assert(index > 0);
assert(data->length < DOM_SCAN_TABLE_SIZE);
assert(index <= data->length);
for (; index <= data->length; index++)
scan_table[index] |= info[i].bits;
} else {
unsigned char *string = info[i].data.string;
int pos = info[i].data.length - 1;
assert(info[i].type == DOM_SCAN_STRING && pos >= 0);
for (; pos >= 0; pos--)
scan_table[string[pos]] |= info[i].bits;
}
}
}
void
init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_info,
struct dom_string *string)
{
if (!scanner_info->initialized) {
init_dom_scanner_info(scanner_info);
scanner_info->initialized = 1;
}
memset(scanner, 0, sizeof(*scanner));
scanner->string = string->string;
scanner->position = string->string;
scanner->end = string->string + string->length;
scanner->current = scanner->table;
scanner->info = scanner_info;
scanner->info->scan(scanner);
}

248
src/dom/scanner.h Normal file
View File

@ -0,0 +1,248 @@
#ifndef EL_DOM_SCANNER_H
#define EL_DOM_SCANNER_H
#include "dom/string.h"
#include "util/error.h"
/* Define if you want a talking scanner */
/* #define DEBUG_DOM_SCANNER */
/* The {struct dom_scanner_token} describes one scanner state. There are two
* kinds of tokens: char and non-char tokens. Char tokens contains only one
* char and simply have their char value as type. They are tokens having
* special control meaning in the code, like ':', ';', '{', '}' and '*'. Non
* char tokens has one or more chars and contain stuff like number or
* indentifier strings. */
struct dom_scanner_token {
/* The type the token */
int type;
/* Some precedence value */
int precedence;
/* The start of the token string and the token length */
struct dom_string string;
};
#define skip_dom_scanner_token_char(token) \
do { (token)->string.string++; (token)->string.length--; } while (0)
/* Compare the string of @token with the "static" string in @str. */
#define dom_scanner_token_contains(token, str) \
((token)->string.length == (sizeof(str) - 1) \
&& !strncasecmp((token)->string.string, str, sizeof(str) - 1))
struct dom_scan_table_info {
enum { DOM_SCAN_RANGE, DOM_SCAN_STRING, DOM_SCAN_END } type;
struct dom_string data;
int bits;
};
#define DOM_SCAN_TABLE_SIZE 256
#define DOM_SCAN_TABLE_INFO(type, data1, data2, bits) \
{ (type), INIT_DOM_STRING((data1), (data2)), (bits) }
#define DOM_SCAN_TABLE_RANGE(from, to, bits) \
DOM_SCAN_TABLE_INFO(DOM_SCAN_RANGE, from, to, bits)
#define DOM_SCAN_TABLE_STRING(str, bits) \
DOM_SCAN_TABLE_INFO(DOM_SCAN_STRING, str, sizeof(str) - 1, bits)
#define DOM_SCAN_TABLE_END \
DOM_SCAN_TABLE_INFO(DOM_SCAN_END, NULL, 0, 0)
struct dom_scanner_string_mapping {
struct dom_string name;
int type;
int base_type;
};
#define DOM_STRING_MAP(str, type, family) \
{ INIT_DOM_STRING(str, -1), (type), (family) }
#define DOM_STRING_MAP_END \
{ INIT_DOM_STRING(NULL, 0), 0, 0 }
struct dom_scanner;
struct dom_scanner_info {
/* Table containing how to map strings to token types */
const struct dom_scanner_string_mapping *mappings;
/* Information for how to initialize the scanner table */
const struct dom_scan_table_info *scan_table_info;
/* Fills the scanner with tokens. Already scanned tokens which have not
* been requested remain and are moved to the start of the scanners
* token table. */
/* Returns the current token or NULL if there are none. */
struct dom_scanner_token *(*scan)(struct dom_scanner *scanner);
/* The scanner table */
/* Contains bitmaps for the various characters groups.
* Idea sync'ed from mozilla browser. */
int scan_table[DOM_SCAN_TABLE_SIZE];
/* Has the scanner info been initialized? */
unsigned int initialized:1;
};
/* Initializes the scanner. */
void init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_info,
struct dom_string *string);
/* The number of tokens in the scanners token table:
* At best it should be big enough to contain properties with space separated
* values and function calls with up to 3 variables like rgb(). At worst it
* should be no less than 2 in order to be able to peek at the next token in
* the scanner. */
#define DOM_SCANNER_TOKENS 10
/* The {struct dom_scanner} describes the current state of the scanner. */
struct dom_scanner {
/* The very start of the scanned string, the position in the string
* where to scan next and the end of the string. If position is NULL it
* means that no more tokens can be retrieved from the string. */
unsigned char *string, *position, *end;
/* The current token and number of scanned tokens in the table.
* If the number of scanned tokens is less than DOM_SCANNER_TOKENS it
* is because there are no more tokens in the string. */
struct dom_scanner_token *current;
int tokens;
/* The 'meta' scanner information */
struct dom_scanner_info *info;
#ifdef DEBUG_SCANNER
/* Debug info about the caller. */
unsigned char *file;
int line;
#endif
/* Some state indicator only meaningful to the scanner internals */
int state;
/* The table contain already scanned tokens. It is maintained in
* order to optimize the scanning a bit and make it possible to look
* ahead at the next token. You should always use the accessors
* (defined below) for getting tokens from the scanner. */
struct dom_scanner_token table[DOM_SCANNER_TOKENS];
};
#define dom_scanner_has_tokens(scanner) \
((scanner)->tokens > 0 && (scanner)->current < (scanner)->table + (scanner)->tokens)
/* This macro checks if the current scanner state is valid. Meaning if the
* scanners table is full the last token skipping or get_next_scanner_token()
* call made it possible to get the type of the next token. */
#define check_dom_scanner(scanner) \
(scanner->tokens < DOM_SCANNER_TOKENS \
|| scanner->current + 1 < scanner->table + scanner->tokens)
/* Scanner table accessors and mutators */
/* Checks the type of the next token */
#define check_next_dom_scanner_token(scanner, token_type) \
(scanner_has_tokens(scanner) \
&& ((scanner)->current + 1 < (scanner)->table + (scanner)->tokens) \
&& (scanner)->current[1].type == (token_type))
/* Access current and next token. Getting the next token might cause
* a rescan so any token pointers that has been stored in a local variable
* might not be valid after the call. */
static inline struct dom_scanner_token *
get_dom_scanner_token(struct dom_scanner *scanner)
{
return dom_scanner_has_tokens(scanner) ? scanner->current : NULL;
}
/* Do a scanning if we do not have also have access to next token. */
static inline struct dom_scanner_token *
get_next_dom_scanner_token(struct dom_scanner *scanner)
{
return (dom_scanner_has_tokens(scanner)
&& (++scanner->current + 1 >= scanner->table + scanner->tokens)
? scanner->info->scan(scanner) : get_dom_scanner_token(scanner));
}
/* This should just make the code more understandable .. hopefully */
#define skip_dom_scanner_token(scanner) get_next_dom_scanner_token(scanner)
/* Removes tokens from the scanner until it meets a token of the given type.
* This token will then also be skipped. */
struct dom_scanner_token *
skip_dom_scanner_tokens(struct dom_scanner *scanner, int skipto, int precedence);
/* Looks up the string from @ident to @end to in the scanners string mapping
* table */
int
map_dom_scanner_string(struct dom_scanner *scanner,
unsigned char *ident, unsigned char *end, int base_type);
#ifdef DEBUG_DOM_SCANNER
void dump_dom_scanner(struct dom_scanner *scanner);
#endif
/* The begin_token_scanning() and end_token_scanning() functions provide the
* basic setup and teardown for the rescan function made public via the
* scanner_info->scan member. */
/* Returns NULL if it is not necessary to try to scan for more tokens */
static inline struct dom_scanner_token *
begin_dom_token_scanning(struct dom_scanner *scanner)
{
struct dom_scanner_token *table = scanner->table;
struct dom_scanner_token *table_end = table + scanner->tokens;
int move_to_front = int_max(table_end - scanner->current, 0);
struct dom_scanner_token *current = move_to_front ? scanner->current : table;
size_t moved_size = 0;
assert(scanner->current);
/* Move any untouched tokens */
if (move_to_front) {
moved_size = move_to_front * sizeof(*table);
memmove(table, current, moved_size);
current = &table[move_to_front];
}
/* Clear all unused tokens */
memset(current, 0, sizeof(*table) * DOM_SCANNER_TOKENS - moved_size);
if (!scanner->position) {
scanner->tokens = move_to_front ? move_to_front : -1;
scanner->current = table;
assert(check_dom_scanner(scanner));
return NULL;
}
scanner->tokens = move_to_front;
return table;
}
/* Updates the @scanner struct after scanning has been done. The position
* _after_ the last valid token is taken as the @end argument. */
/* It is ok for @end to be < scanner->table since scanner->tokens will become
* <= 0 anyway. */
static inline struct dom_scanner_token *
end_dom_token_scanning(struct dom_scanner *scanner, struct dom_scanner_token *end)
{
assert(end <= scanner->table + DOM_SCANNER_TOKENS);
scanner->tokens = (end - scanner->table);
scanner->current = scanner->table;
if (scanner->position >= scanner->end)
scanner->position = NULL;
assert(check_dom_scanner(scanner));
return get_dom_scanner_token(scanner);
}
#endif

View File

@ -6,21 +6,21 @@
#include "elinks.h" #include "elinks.h"
#include "document/css/scanner.h" #include "dom/css/scanner.h"
#include "document/dom/dom.h" #include "dom/dom.h"
#include "document/dom/node.h" #include "dom/node.h"
#include "document/dom/select.h" #include "dom/scanner.h"
#include "document/dom/stack.h" #include "dom/select.h"
#include "dom/stack.h"
#include "dom/string.h"
#include "util/memory.h" #include "util/memory.h"
#include "util/scanner.h"
#include "util/string.h"
/* Selector parsing: */ /* Selector parsing: */
/* Maps the content of a scanner token to a pseudo-class or -element ID. */ /* Maps the content of a scanner token to a pseudo-class or -element ID. */
static enum dom_select_pseudo static enum dom_select_pseudo
get_dom_select_pseudo(struct scanner_token *token) get_dom_select_pseudo(struct dom_scanner_token *token)
{ {
static struct { static struct {
struct dom_string string; struct dom_string string;
@ -70,13 +70,10 @@ get_dom_select_pseudo(struct scanner_token *token)
#undef INIT_DOM_SELECT_PSEUDO_STRING #undef INIT_DOM_SELECT_PSEUDO_STRING
}; };
struct dom_string string;
int i; int i;
set_dom_string(&string, token->string, token->length);
for (i = 0; i < sizeof_array(pseudo_info); i++) for (i = 0; i < sizeof_array(pseudo_info); i++)
if (!dom_string_casecmp(&pseudo_info[i].string, &string)) if (!dom_string_casecmp(&pseudo_info[i].string, &token->string))
return pseudo_info[i].pseudo; return pseudo_info[i].pseudo;
return DOM_SELECT_PSEUDO_UNKNOWN; return DOM_SELECT_PSEUDO_UNKNOWN;
@ -84,9 +81,9 @@ get_dom_select_pseudo(struct scanner_token *token)
/* Parses attribute selector. For example '[foo="bar"]' or '[foo|="boo"]'. */ /* Parses attribute selector. For example '[foo="bar"]' or '[foo|="boo"]'. */
static enum dom_exception_code static enum dom_exception_code
parse_dom_select_attribute(struct dom_select_node *sel, struct scanner *scanner) parse_dom_select_attribute(struct dom_select_node *sel, struct dom_scanner *scanner)
{ {
struct scanner_token *token = get_scanner_token(scanner); struct dom_scanner_token *token = get_dom_scanner_token(scanner);
/* Get '['. */ /* Get '['. */
@ -95,15 +92,15 @@ parse_dom_select_attribute(struct dom_select_node *sel, struct scanner *scanner)
/* Get the attribute name. */ /* Get the attribute name. */
token = get_next_scanner_token(scanner); token = get_next_dom_scanner_token(scanner);
if (!token || token->type != CSS_TOKEN_IDENT) if (!token || token->type != CSS_TOKEN_IDENT)
return DOM_ERR_SYNTAX; return DOM_ERR_SYNTAX;
set_dom_string(&sel->node.string, token->string, token->length); copy_dom_string(&sel->node.string, &token->string);
/* Get the optional '=' combo or ending ']'. */ /* Get the optional '=' combo or ending ']'. */
token = get_next_scanner_token(scanner); token = get_next_dom_scanner_token(scanner);
if (!token) return DOM_ERR_SYNTAX; if (!token) return DOM_ERR_SYNTAX;
switch (token->type) { switch (token->type) {
@ -137,13 +134,13 @@ parse_dom_select_attribute(struct dom_select_node *sel, struct scanner *scanner)
/* Get the required value. */ /* Get the required value. */
token = get_next_scanner_token(scanner); token = get_next_dom_scanner_token(scanner);
if (!token) return DOM_ERR_SYNTAX; if (!token) return DOM_ERR_SYNTAX;
switch (token->type) { switch (token->type) {
case CSS_TOKEN_IDENT: case CSS_TOKEN_IDENT:
case CSS_TOKEN_STRING: case CSS_TOKEN_STRING:
set_dom_string(&sel->node.data.attribute.value, token->string, token->length); copy_dom_string(&sel->node.data.attribute.value, &token->string);
break; break;
default: default:
@ -152,7 +149,7 @@ parse_dom_select_attribute(struct dom_select_node *sel, struct scanner *scanner)
/* Get the ending ']'. */ /* Get the ending ']'. */
token = get_next_scanner_token(scanner); token = get_next_dom_scanner_token(scanner);
if (token && token->type == ']') if (token && token->type == ']')
return DOM_ERR_NONE; return DOM_ERR_NONE;
@ -170,13 +167,13 @@ parse_dom_select_attribute(struct dom_select_node *sel, struct scanner *scanner)
* 0n+0 * 0n+0
*/ */
/* FIXME: Move somewhere else? util/scanner.h? */ /* FIXME: Move somewhere else? dom/scanner.h? */
static size_t static size_t
get_scanner_token_number(struct scanner_token *token) get_scanner_token_number(struct dom_scanner_token *token)
{ {
size_t number = 0; size_t number = 0;
while (token->length > 0 && isdigit(token->string[0])) { while (token->string.length > 0 && isdigit(token->string.string[0])) {
size_t old_number = number; size_t old_number = number;
number *= 10; number *= 10;
@ -185,8 +182,8 @@ get_scanner_token_number(struct scanner_token *token)
if (old_number > number) if (old_number > number)
return -1; return -1;
number += token->string[0] - '0'; number += token->string.string[0] - '0';
token->string++, token->length--; skip_dom_scanner_token_char(token);
} }
return number; return number;
@ -194,26 +191,26 @@ get_scanner_token_number(struct scanner_token *token)
/* Parses the '(...)' part of ':nth-of-type(...)' and ':nth-child(...)'. */ /* Parses the '(...)' part of ':nth-of-type(...)' and ':nth-child(...)'. */
static enum dom_exception_code static enum dom_exception_code
parse_dom_select_nth_arg(struct dom_select_nth_match *nth, struct scanner *scanner) parse_dom_select_nth_arg(struct dom_select_nth_match *nth, struct dom_scanner *scanner)
{ {
struct scanner_token *token = get_next_scanner_token(scanner); struct dom_scanner_token *token = get_next_dom_scanner_token(scanner);
int sign = 1; int sign = 1;
int number = -1; int number = -1;
if (!token || token->type != '(') if (!token || token->type != '(')
return DOM_ERR_SYNTAX; return DOM_ERR_SYNTAX;
token = get_next_scanner_token(scanner); token = get_next_dom_scanner_token(scanner);
if (!token) if (!token)
return DOM_ERR_SYNTAX; return DOM_ERR_SYNTAX;
switch (token->type) { switch (token->type) {
case CSS_TOKEN_IDENT: case CSS_TOKEN_IDENT:
if (scanner_token_contains(token, "even")) { if (dom_scanner_token_contains(token, "even")) {
nth->step = 2; nth->step = 2;
nth->index = 0; nth->index = 0;
} else if (scanner_token_contains(token, "odd")) { } else if (dom_scanner_token_contains(token, "odd")) {
nth->step = 2; nth->step = 2;
nth->index = 1; nth->index = 1;
@ -230,7 +227,7 @@ parse_dom_select_nth_arg(struct dom_select_nth_match *nth, struct scanner *scann
case '-': case '-':
sign = -1; sign = -1;
token = get_next_scanner_token(scanner); token = get_next_dom_scanner_token(scanner);
if (!token) return DOM_ERR_SYNTAX; if (!token) return DOM_ERR_SYNTAX;
if (token->type != CSS_TOKEN_IDENT) if (token->type != CSS_TOKEN_IDENT)
@ -245,7 +242,7 @@ parse_dom_select_nth_arg(struct dom_select_nth_match *nth, struct scanner *scann
if (number < 0) if (number < 0)
return DOM_ERR_INVALID_STATE; return DOM_ERR_INVALID_STATE;
token = get_next_scanner_token(scanner); token = get_next_dom_scanner_token(scanner);
if (!token) return DOM_ERR_SYNTAX; if (!token) return DOM_ERR_SYNTAX;
break; break;
@ -256,18 +253,18 @@ parse_dom_select_nth_arg(struct dom_select_nth_match *nth, struct scanner *scann
/* The rest can contain n+ part */ /* The rest can contain n+ part */
switch (token->type) { switch (token->type) {
case CSS_TOKEN_IDENT: case CSS_TOKEN_IDENT:
if (!scanner_token_contains(token, "n")) if (!dom_scanner_token_contains(token, "n"))
return DOM_ERR_SYNTAX; return DOM_ERR_SYNTAX;
nth->step = sign * number; nth->step = sign * number;
token = get_next_scanner_token(scanner); token = get_next_dom_scanner_token(scanner);
if (!token) return DOM_ERR_SYNTAX; if (!token) return DOM_ERR_SYNTAX;
if (token->type != '+') if (token->type != '+')
break; break;
token = get_next_scanner_token(scanner); token = get_next_dom_scanner_token(scanner);
if (!token) return DOM_ERR_SYNTAX; if (!token) return DOM_ERR_SYNTAX;
if (token->type != CSS_TOKEN_NUMBER) if (token->type != CSS_TOKEN_NUMBER)
@ -294,15 +291,15 @@ parse_dom_select_nth_arg(struct dom_select_nth_match *nth, struct scanner *scann
/* Parse a pseudo-class or -element with the syntax: ':<ident>'. */ /* Parse a pseudo-class or -element with the syntax: ':<ident>'. */
static enum dom_exception_code static enum dom_exception_code
parse_dom_select_pseudo(struct dom_select *select, struct dom_select_node *sel, parse_dom_select_pseudo(struct dom_select *select, struct dom_select_node *sel,
struct scanner *scanner) struct dom_scanner *scanner)
{ {
struct scanner_token *token = get_scanner_token(scanner); struct dom_scanner_token *token = get_dom_scanner_token(scanner);
enum dom_select_pseudo pseudo; enum dom_select_pseudo pseudo;
enum dom_exception_code code; enum dom_exception_code code;
/* Skip double :'s in front of some pseudo's (::first-line, etc.) */ /* Skip double :'s in front of some pseudo's (::first-line, etc.) */
do { do {
token = get_next_scanner_token(scanner); token = get_next_dom_scanner_token(scanner);
} while (token && token->type == ':'); } while (token && token->type == ':');
if (!token || token->type != CSS_TOKEN_IDENT) if (!token || token->type != CSS_TOKEN_IDENT)
@ -389,17 +386,17 @@ parse_dom_select_pseudo(struct dom_select *select, struct dom_select_node *sel,
/* Parse a CSS3 selector and add selector nodes to the @select struct. */ /* Parse a CSS3 selector and add selector nodes to the @select struct. */
static enum dom_exception_code static enum dom_exception_code
parse_dom_select(struct dom_select *select, struct dom_stack *stack, parse_dom_select(struct dom_select *select, struct dom_stack *stack,
unsigned char *string, int length) struct dom_string *string)
{ {
struct scanner scanner; struct dom_scanner scanner;
struct dom_select_node sel; struct dom_select_node sel;
init_scanner(&scanner, &css_scanner_info, string, string + length); init_dom_scanner(&scanner, &dom_css_scanner_info, string);
memset(&sel, 0, sizeof(sel)); memset(&sel, 0, sizeof(sel));
while (scanner_has_tokens(&scanner)) { while (dom_scanner_has_tokens(&scanner)) {
struct scanner_token *token = get_scanner_token(&scanner); struct dom_scanner_token *token = get_dom_scanner_token(&scanner);
enum dom_exception_code code; enum dom_exception_code code;
struct dom_select_node *select_node; struct dom_select_node *select_node;
@ -416,8 +413,8 @@ parse_dom_select(struct dom_select *select, struct dom_stack *stack,
switch (token->type) { switch (token->type) {
case CSS_TOKEN_IDENT: case CSS_TOKEN_IDENT:
sel.node.type = DOM_NODE_ELEMENT; sel.node.type = DOM_NODE_ELEMENT;
set_dom_string(&sel.node.string, token->string, token->length); copy_dom_string(&sel.node.string, &token->string);
if (token->length == 1 && token->string[0] == '*') if (dom_scanner_token_contains(token, "*"))
sel.match.element |= DOM_SELECT_ELEMENT_UNIVERSAL; sel.match.element |= DOM_SELECT_ELEMENT_UNIVERSAL;
break; break;
@ -427,7 +424,7 @@ parse_dom_select(struct dom_select *select, struct dom_stack *stack,
sel.node.type = DOM_NODE_ATTRIBUTE; sel.node.type = DOM_NODE_ATTRIBUTE;
sel.match.attribute |= DOM_SELECT_ATTRIBUTE_ID; sel.match.attribute |= DOM_SELECT_ATTRIBUTE_ID;
/* Skip the leading '#'. */ /* Skip the leading '#'. */
token->string++, token->length--; skip_dom_scanner_token_char(token);
break; break;
case '[': case '[':
@ -438,14 +435,14 @@ parse_dom_select(struct dom_select *select, struct dom_stack *stack,
break; break;
case '.': case '.':
token = get_next_scanner_token(&scanner); token = get_next_dom_scanner_token(&scanner);
if (!token || token->type != CSS_TOKEN_IDENT) if (!token || token->type != CSS_TOKEN_IDENT)
return DOM_ERR_SYNTAX; return DOM_ERR_SYNTAX;
sel.node.type = DOM_NODE_ATTRIBUTE; sel.node.type = DOM_NODE_ATTRIBUTE;
sel.match.attribute |= DOM_SELECT_ATTRIBUTE_SPACE_LIST; sel.match.attribute |= DOM_SELECT_ATTRIBUTE_SPACE_LIST;
set_dom_string(&sel.node.string, "class", -1); set_dom_string(&sel.node.string, "class", -1);
set_dom_string(&sel.node.data.attribute.value, token->string, token->length); copy_dom_string(&sel.node.data.attribute.value, &token->string);
break; break;
case ':': case ':':
@ -476,7 +473,7 @@ parse_dom_select(struct dom_select *select, struct dom_stack *stack,
return DOM_ERR_SYNTAX; return DOM_ERR_SYNTAX;
} }
skip_scanner_token(&scanner); skip_dom_scanner_token(&scanner);
if (sel.node.type == DOM_NODE_UNKNOWN) if (sel.node.type == DOM_NODE_UNKNOWN)
continue; continue;
@ -523,8 +520,7 @@ parse_dom_select(struct dom_select *select, struct dom_stack *stack,
/* Basically this is just a wrapper for parse_dom_select() to ease error /* Basically this is just a wrapper for parse_dom_select() to ease error
* handling. */ * handling. */
struct dom_select * struct dom_select *
init_dom_select(enum dom_select_syntax syntax, init_dom_select(enum dom_select_syntax syntax, struct dom_string *string)
unsigned char *string, int length)
{ {
struct dom_select *select = mem_calloc(1, sizeof(select)); struct dom_select *select = mem_calloc(1, sizeof(select));
struct dom_stack stack; struct dom_stack stack;
@ -532,7 +528,7 @@ init_dom_select(enum dom_select_syntax syntax,
init_dom_stack(&stack, DOM_STACK_KEEP_NODES); init_dom_stack(&stack, DOM_STACK_KEEP_NODES);
code = parse_dom_select(select, &stack, string, length); code = parse_dom_select(select, &stack, string);
done_dom_stack(&stack); done_dom_stack(&stack);
if (code == DOM_ERR_NONE) if (code == DOM_ERR_NONE)

View File

@ -1,7 +1,7 @@
#ifndef EL__DOCUMENT_DOM_SELECT_H #ifndef EL_DOM_SELECT_H
#define EL__DOCUMENT_DOM_SELECT_H #define EL_DOM_SELECT_H
#include "document/dom/node.h" #include "dom/node.h"
/* FIXME: Namespaces; *|E */ /* FIXME: Namespaces; *|E */
@ -193,7 +193,7 @@ enum dom_select_syntax {
}; };
struct dom_select *init_dom_select(enum dom_select_syntax syntax, struct dom_select *init_dom_select(enum dom_select_syntax syntax,
unsigned char *string, int length); struct dom_string *string);
void done_dom_select(struct dom_select *select); void done_dom_select(struct dom_select *select);

View File

@ -9,8 +9,8 @@
#include "elinks.h" #include "elinks.h"
#include "document/sgml/html/html.h" #include "dom/sgml/html/html.h"
#include "document/sgml/sgml.h" #include "dom/sgml/sgml.h"
#define HTML_(node, name, id) SGML_NODE_INFO(HTML, node, name, id) #define HTML_(node, name, id) SGML_NODE_INFO(HTML, node, name, id)
@ -20,13 +20,13 @@
static struct sgml_node_info html_attributes[HTML_ATTRIBUTES] = { static struct sgml_node_info html_attributes[HTML_ATTRIBUTES] = {
SGML_NODE_HEAD(HTML, ATTRIBUTE), SGML_NODE_HEAD(HTML, ATTRIBUTE),
#include "document/sgml/html/attribute.inc" #include "dom/sgml/html/attribute.inc"
}; };
static struct sgml_node_info html_elements[HTML_ELEMENTS] = { static struct sgml_node_info html_elements[HTML_ELEMENTS] = {
SGML_NODE_HEAD(HTML, ELEMENT), SGML_NODE_HEAD(HTML, ELEMENT),
#include "document/sgml/html/element.inc" #include "dom/sgml/html/element.inc"
}; };

View File

@ -1,8 +1,8 @@
#ifndef EL__DOCUMENT_SGML_HTML_HTML_H #ifndef EL_DOM_SGML_HTML_HTML_H
#define EL__DOCUMENT_SGML_HTML_HTML_H #define EL_DOM_SGML_HTML_HTML_H
#include "document/sgml/sgml.h" #include "dom/sgml/sgml.h"
extern struct sgml_info sgml_html_info; extern struct sgml_info sgml_html_info;
@ -13,7 +13,7 @@ extern struct sgml_info sgml_html_info;
enum html_element_type { enum html_element_type {
HTML_ELEMENT_UNKNOWN, HTML_ELEMENT_UNKNOWN,
#include "document/sgml/html/element.inc" #include "dom/sgml/html/element.inc"
HTML_ELEMENTS, HTML_ELEMENTS,
}; };
@ -21,7 +21,7 @@ enum html_element_type {
enum html_attribute_type { enum html_attribute_type {
HTML_ATTRIBUTE_UNKNOWN, HTML_ATTRIBUTE_UNKNOWN,
#include "document/sgml/html/attribute.inc" #include "dom/sgml/html/attribute.inc"
HTML_ATTRIBUTES, HTML_ATTRIBUTES,
}; };

View File

@ -9,20 +9,18 @@
#include "elinks.h" #include "elinks.h"
#include "document/dom/node.h" #include "dom/node.h"
#include "document/dom/stack.h" #include "dom/sgml/parser.h"
#include "document/sgml/parser.h" #include "dom/sgml/scanner.h"
#include "document/sgml/scanner.h" #include "dom/sgml/sgml.h"
#include "document/sgml/sgml.h" #include "dom/stack.h"
#include "protocol/uri.h" #include "dom/string.h"
#include "util/error.h" #include "util/error.h"
#include "util/lists.h"
#include "util/memory.h" #include "util/memory.h"
#include "util/string.h"
static struct sgml_parsing_state * static struct sgml_parsing_state *
init_sgml_parsing_state(struct sgml_parser *parser, struct string *buffer); init_sgml_parsing_state(struct sgml_parser *parser, struct dom_string *buffer);
/* When getting the sgml_parser struct it is _always_ assumed that the parser /* When getting the sgml_parser struct it is _always_ assumed that the parser
@ -41,17 +39,15 @@ init_sgml_parsing_state(struct sgml_parser *parser, struct string *buffer);
* information like node subtypes and SGML parser state information. */ * information like node subtypes and SGML parser state information. */
static inline struct dom_node * static inline struct dom_node *
add_sgml_document(struct dom_stack *stack, struct uri *uri) add_sgml_document(struct dom_stack *stack, struct dom_string *string)
{ {
unsigned char *string = struri(uri); struct dom_node *node = init_dom_node(DOM_NODE_DOCUMENT, string);
size_t length = strlen(string);
struct dom_node *node = init_dom_node(DOM_NODE_DOCUMENT, string, length);
return node ? push_dom_node(stack, node) : NULL; return node ? push_dom_node(stack, node) : NULL;
} }
static inline struct dom_node * static inline struct dom_node *
add_sgml_element(struct dom_stack *stack, struct scanner_token *token) add_sgml_element(struct dom_stack *stack, struct dom_scanner_token *token)
{ {
struct sgml_parser *parser = get_sgml_parser(stack); struct sgml_parser *parser = get_sgml_parser(stack);
struct dom_node *parent = get_dom_stack_top(stack)->node; struct dom_node *parent = get_dom_stack_top(stack)->node;
@ -60,7 +56,7 @@ add_sgml_element(struct dom_stack *stack, struct scanner_token *token)
struct dom_node *node; struct dom_node *node;
struct sgml_node_info *node_info; struct sgml_node_info *node_info;
node = add_dom_element(parent, token->string, token->length); node = add_dom_element(parent, &token->string);
if (!node) return NULL; if (!node) return NULL;
node_info = get_sgml_node_info(parser->info->elements, node); node_info = get_sgml_node_info(parser->info->elements, node);
@ -81,17 +77,15 @@ add_sgml_element(struct dom_stack *stack, struct scanner_token *token)
static inline void static inline void
add_sgml_attribute(struct dom_stack *stack, add_sgml_attribute(struct dom_stack *stack,
struct scanner_token *token, struct scanner_token *valtoken) struct dom_scanner_token *token, struct dom_scanner_token *valtoken)
{ {
struct sgml_parser *parser = get_sgml_parser(stack); struct sgml_parser *parser = get_sgml_parser(stack);
struct dom_node *parent = get_dom_stack_top(stack)->node; struct dom_node *parent = get_dom_stack_top(stack)->node;
unsigned char *value = valtoken ? valtoken->string : NULL; struct dom_string *value = valtoken ? &valtoken->string : NULL;
size_t valuelen = valtoken ? valtoken->length : 0;
struct sgml_node_info *info; struct sgml_node_info *info;
struct dom_node *node; struct dom_node *node;
node = add_dom_attribute(parent, token->string, token->length, node = add_dom_attribute(parent, &token->string, value);
value, valuelen);
info = get_sgml_node_info(parser->info->attributes, node); info = get_sgml_node_info(parser->info->attributes, node);
@ -109,22 +103,23 @@ add_sgml_attribute(struct dom_stack *stack,
} }
static inline struct dom_node * static inline struct dom_node *
add_sgml_proc_instruction(struct dom_stack *stack, struct scanner_token *token) add_sgml_proc_instruction(struct dom_stack *stack, struct dom_scanner_token *token)
{ {
struct dom_node *parent = get_dom_stack_top(stack)->node; struct dom_node *parent = get_dom_stack_top(stack)->node;
struct dom_node *node; struct dom_node *node;
/* Split the token in two if we can find a first space separator. */ /* Split the token in two if we can find a first space separator. */
unsigned char *separator = memchr(token->string, ' ', token->length); unsigned char *separator = memchr(token->string.string, ' ', token->string.length);
/* Anything before the separator becomes the target name ... */ /* Anything before the separator becomes the target name ... */
unsigned char *name = token->string; size_t namelen = separator ? separator - token->string.string : token->string.length;
size_t namelen = separator ? separator - token->string : token->length; struct dom_string name = INIT_DOM_STRING(token->string.string, namelen);
/* ... and everything after the instruction value. */ /* ... and everything after the instruction value. */
unsigned char *value = separator ? separator + 1 : NULL; unsigned char *valuestr = separator ? separator + 1 : NULL;
size_t valuelen = value ? token->length - namelen - 1 : 0; size_t valuelen = valuestr ? token->string.length - namelen - 1 : 0;
struct dom_string value = INIT_DOM_STRING(valuestr, valuelen);
node = add_dom_proc_instruction(parent, name, namelen, value, valuelen); node = add_dom_proc_instruction(parent, &name, &value);
if (!node) return NULL; if (!node) return NULL;
switch (token->type) { switch (token->type) {
@ -147,10 +142,10 @@ add_sgml_proc_instruction(struct dom_stack *stack, struct scanner_token *token)
} }
static inline void static inline void
add_sgml_node(struct dom_stack *stack, enum dom_node_type type, struct scanner_token *token) add_sgml_node(struct dom_stack *stack, enum dom_node_type type, struct dom_scanner_token *token)
{ {
struct dom_node *parent = get_dom_stack_top(stack)->node; struct dom_node *parent = get_dom_stack_top(stack)->node;
struct dom_node *node = add_dom_node(parent, type, token->string, token->length); struct dom_node *node = add_dom_node(parent, type, &token->string);
if (!node) return; if (!node) return;
@ -165,24 +160,24 @@ add_sgml_node(struct dom_stack *stack, enum dom_node_type type, struct scanner_t
/* SGML parser main handling: */ /* SGML parser main handling: */
static inline void static inline void
parse_sgml_attributes(struct dom_stack *stack, struct scanner *scanner) parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner)
{ {
struct scanner_token name; struct dom_scanner_token name;
assert(scanner_has_tokens(scanner) assert(dom_scanner_has_tokens(scanner)
&& (get_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN && (get_dom_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN
|| get_scanner_token(scanner)->type == SGML_TOKEN_PROCESS_XML)); || get_dom_scanner_token(scanner)->type == SGML_TOKEN_PROCESS_XML));
skip_scanner_token(scanner); skip_dom_scanner_token(scanner);
while (scanner_has_tokens(scanner)) { while (dom_scanner_has_tokens(scanner)) {
struct scanner_token *token = get_scanner_token(scanner); struct dom_scanner_token *token = get_dom_scanner_token(scanner);
assert(token); assert(token);
switch (token->type) { switch (token->type) {
case SGML_TOKEN_TAG_END: case SGML_TOKEN_TAG_END:
skip_scanner_token(scanner); skip_dom_scanner_token(scanner);
/* and return */ /* and return */
case SGML_TOKEN_ELEMENT: case SGML_TOKEN_ELEMENT:
case SGML_TOKEN_ELEMENT_BEGIN: case SGML_TOKEN_ELEMENT_BEGIN:
@ -194,11 +189,11 @@ parse_sgml_attributes(struct dom_stack *stack, struct scanner *scanner)
copy_struct(&name, token); copy_struct(&name, token);
/* Skip the attribute name token */ /* Skip the attribute name token */
token = get_next_scanner_token(scanner); token = get_next_dom_scanner_token(scanner);
if (token && token->type == '=') { if (token && token->type == '=') {
/* If the token is not a valid value token /* If the token is not a valid value token
* ignore it. */ * ignore it. */
token = get_next_scanner_token(scanner); token = get_next_dom_scanner_token(scanner);
if (token if (token
&& token->type != SGML_TOKEN_IDENT && token->type != SGML_TOKEN_IDENT
&& token->type != SGML_TOKEN_ATTRIBUTE && token->type != SGML_TOKEN_ATTRIBUTE
@ -212,28 +207,28 @@ parse_sgml_attributes(struct dom_stack *stack, struct scanner *scanner)
/* Skip the value token */ /* Skip the value token */
if (token) if (token)
skip_scanner_token(scanner); skip_dom_scanner_token(scanner);
break; break;
default: default:
skip_scanner_token(scanner); skip_dom_scanner_token(scanner);
} }
} }
} }
static void static void
parse_sgml_plain(struct dom_stack *stack, struct scanner *scanner) parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
{ {
while (scanner_has_tokens(scanner)) { while (dom_scanner_has_tokens(scanner)) {
struct scanner_token *token = get_scanner_token(scanner); struct dom_scanner_token *token = get_dom_scanner_token(scanner);
switch (token->type) { switch (token->type) {
case SGML_TOKEN_ELEMENT: case SGML_TOKEN_ELEMENT:
case SGML_TOKEN_ELEMENT_BEGIN: case SGML_TOKEN_ELEMENT_BEGIN:
if (!add_sgml_element(stack, token)) { if (!add_sgml_element(stack, token)) {
if (token->type == SGML_TOKEN_ELEMENT) { if (token->type == SGML_TOKEN_ELEMENT) {
skip_scanner_token(scanner); skip_dom_scanner_token(scanner);
break; break;
} }
@ -244,24 +239,24 @@ parse_sgml_plain(struct dom_stack *stack, struct scanner *scanner)
if (token->type == SGML_TOKEN_ELEMENT_BEGIN) { if (token->type == SGML_TOKEN_ELEMENT_BEGIN) {
parse_sgml_attributes(stack, scanner); parse_sgml_attributes(stack, scanner);
} else { } else {
skip_scanner_token(scanner); skip_dom_scanner_token(scanner);
} }
break; break;
case SGML_TOKEN_ELEMENT_EMPTY_END: case SGML_TOKEN_ELEMENT_EMPTY_END:
pop_dom_node(stack); pop_dom_node(stack);
skip_scanner_token(scanner); skip_dom_scanner_token(scanner);
break; break;
case SGML_TOKEN_ELEMENT_END: case SGML_TOKEN_ELEMENT_END:
if (!token->length) { if (!token->string.length) {
pop_dom_node(stack); pop_dom_node(stack);
} else { } else {
struct dom_string string; struct dom_string string;
struct dom_stack_state *state; struct dom_stack_state *state;
set_dom_string(&string, token->string, token->length); set_dom_string(&string, token->string.string, token->string.length);
state = search_dom_stack(stack, DOM_NODE_ELEMENT, state = search_dom_stack(stack, DOM_NODE_ELEMENT,
&string); &string);
if (state) { if (state) {
@ -273,12 +268,12 @@ parse_sgml_plain(struct dom_stack *stack, struct scanner *scanner)
pop_dom_state(stack, state); pop_dom_state(stack, state);
} }
} }
skip_scanner_token(scanner); skip_dom_scanner_token(scanner);
break; break;
case SGML_TOKEN_NOTATION_COMMENT: case SGML_TOKEN_NOTATION_COMMENT:
add_sgml_node(stack, DOM_NODE_COMMENT, token); add_sgml_node(stack, DOM_NODE_COMMENT, token);
skip_scanner_token(scanner); skip_dom_scanner_token(scanner);
break; break;
case SGML_TOKEN_NOTATION_ATTLIST: case SGML_TOKEN_NOTATION_ATTLIST:
@ -286,12 +281,12 @@ parse_sgml_plain(struct dom_stack *stack, struct scanner *scanner)
case SGML_TOKEN_NOTATION_ELEMENT: case SGML_TOKEN_NOTATION_ELEMENT:
case SGML_TOKEN_NOTATION_ENTITY: case SGML_TOKEN_NOTATION_ENTITY:
case SGML_TOKEN_NOTATION: case SGML_TOKEN_NOTATION:
skip_scanner_token(scanner); skip_dom_scanner_token(scanner);
break; break;
case SGML_TOKEN_CDATA_SECTION: case SGML_TOKEN_CDATA_SECTION:
add_sgml_node(stack, DOM_NODE_CDATA_SECTION, token); add_sgml_node(stack, DOM_NODE_CDATA_SECTION, token);
skip_scanner_token(scanner); skip_dom_scanner_token(scanner);
break; break;
case SGML_TOKEN_PROCESS_XML: case SGML_TOKEN_PROCESS_XML:
@ -306,30 +301,30 @@ parse_sgml_plain(struct dom_stack *stack, struct scanner *scanner)
case SGML_TOKEN_PROCESS: case SGML_TOKEN_PROCESS:
add_sgml_proc_instruction(stack, token); add_sgml_proc_instruction(stack, token);
skip_scanner_token(scanner); skip_dom_scanner_token(scanner);
break; break;
case SGML_TOKEN_ENTITY: case SGML_TOKEN_ENTITY:
add_sgml_node(stack, DOM_NODE_ENTITY_REFERENCE, token); add_sgml_node(stack, DOM_NODE_ENTITY_REFERENCE, token);
skip_scanner_token(scanner); skip_dom_scanner_token(scanner);
break; break;
case SGML_TOKEN_SPACE: case SGML_TOKEN_SPACE:
case SGML_TOKEN_TEXT: case SGML_TOKEN_TEXT:
default: default:
add_sgml_node(stack, DOM_NODE_TEXT, token); add_sgml_node(stack, DOM_NODE_TEXT, token);
skip_scanner_token(scanner); skip_dom_scanner_token(scanner);
} }
} }
} }
struct dom_node * struct dom_node *
parse_sgml(struct sgml_parser *parser, struct string *buffer) parse_sgml(struct sgml_parser *parser, struct dom_string *buffer)
{ {
struct sgml_parsing_state *parsing; struct sgml_parsing_state *parsing;
if (!parser->root) { if (!parser->root) {
parser->root = add_sgml_document(&parser->stack, parser->uri); parser->root = add_sgml_document(&parser->stack, &parser->uri);
if (!parser->root) if (!parser->root)
return NULL; return NULL;
get_dom_stack_top(&parser->stack)->immutable = 1; get_dom_stack_top(&parser->stack)->immutable = 1;
@ -360,12 +355,10 @@ sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data)
{ {
struct sgml_parser *parser = get_sgml_parser(stack); struct sgml_parser *parser = get_sgml_parser(stack);
struct sgml_parsing_state *parsing = data; struct sgml_parsing_state *parsing = data;
unsigned char *source = node->string.string;
unsigned char *end = source + node->string.length;
parsing->depth = parser->stack.depth; parsing->depth = parser->stack.depth;
get_dom_stack_top(&parser->stack)->immutable = 1; get_dom_stack_top(&parser->stack)->immutable = 1;
init_scanner(&parsing->scanner, &sgml_scanner_info, source, end); init_dom_scanner(&parsing->scanner, &sgml_scanner_info, &node->string);
} }
static void static void
@ -422,12 +415,12 @@ static struct dom_stack_context_info sgml_parsing_context_info = {
/* Create a new parsing state by pushing a new text node containing the*/ /* Create a new parsing state by pushing a new text node containing the*/
static struct sgml_parsing_state * static struct sgml_parsing_state *
init_sgml_parsing_state(struct sgml_parser *parser, struct string *buffer) init_sgml_parsing_state(struct sgml_parser *parser, struct dom_string *buffer)
{ {
struct dom_stack_state *state; struct dom_stack_state *state;
struct dom_node *node; struct dom_node *node;
node = init_dom_node(DOM_NODE_TEXT, buffer->source, buffer->length); node = init_dom_node(DOM_NODE_TEXT, buffer);
if (!node || !push_dom_node(&parser->parsing, node)) if (!node || !push_dom_node(&parser->parsing, node))
return NULL; return NULL;
@ -479,7 +472,7 @@ static struct dom_stack_context_info sgml_parser_context_info = {
struct sgml_parser * struct sgml_parser *
init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype, init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype,
struct uri *uri) struct dom_string *uri)
{ {
struct sgml_parser *parser; struct sgml_parser *parser;
enum dom_stack_flag flags = 0; enum dom_stack_flag flags = 0;
@ -487,8 +480,12 @@ init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype,
parser = mem_calloc(1, sizeof(*parser)); parser = mem_calloc(1, sizeof(*parser));
if (!parser) return NULL; if (!parser) return NULL;
if (!init_dom_string(&parser->uri, uri->string, uri->length)) {
mem_free(parser);
return NULL;
}
parser->type = type; parser->type = type;
parser->uri = get_uri_reference(uri);
parser->info = get_sgml_info(doctype); parser->info = get_sgml_info(doctype);
if (type == SGML_PARSER_TREE) if (type == SGML_PARSER_TREE)
@ -511,6 +508,6 @@ done_sgml_parser(struct sgml_parser *parser)
{ {
done_dom_stack(&parser->stack); done_dom_stack(&parser->stack);
done_dom_stack(&parser->parsing); done_dom_stack(&parser->parsing);
done_uri(parser->uri); done_dom_string(&parser->uri);
mem_free(parser); mem_free(parser);
} }

View File

@ -1,11 +1,11 @@
#ifndef EL__DOCUMENT_SGML_PARSER_H #ifndef EL_DOM_SGML_PARSER_H
#define EL__DOCUMENT_SGML_PARSER_H #define EL_DOM_SGML_PARSER_H
#include "document/dom/node.h" #include "dom/node.h"
#include "document/dom/stack.h" #include "dom/stack.h"
#include "document/sgml/sgml.h" #include "dom/sgml/sgml.h"
#include "util/scanner.h" #include "dom/scanner.h"
struct string; struct string;
struct uri; struct uri;
@ -27,7 +27,7 @@ enum sgml_parser_type {
* used to feed output of stuff like ECMAScripts document.write() from * used to feed output of stuff like ECMAScripts document.write() from
* <script>-elements back to the SGML parser. */ * <script>-elements back to the SGML parser. */
struct sgml_parsing_state { struct sgml_parsing_state {
struct scanner scanner; struct dom_scanner scanner;
struct dom_node *node; struct dom_node *node;
size_t depth; size_t depth;
}; };
@ -37,7 +37,7 @@ struct sgml_parser {
struct sgml_info *info; /* Backend dependent info */ struct sgml_info *info; /* Backend dependent info */
struct uri *uri; /* The URI of the DOM document */ struct dom_string uri; /* The URI of the DOM document */
struct dom_node *root; /* The document root node */ struct dom_node *root; /* The document root node */
struct dom_stack stack; /* A stack for tracking parsed nodes */ struct dom_stack stack; /* A stack for tracking parsed nodes */
@ -48,15 +48,15 @@ struct sgml_parser_state {
struct sgml_node_info *info; struct sgml_node_info *info;
/* This is used by the DOM source renderer for highlighting the /* This is used by the DOM source renderer for highlighting the
* end-tag of an element. */ * end-tag of an element. */
struct scanner_token end_token; struct dom_scanner_token end_token;
}; };
struct sgml_parser * struct sgml_parser *
init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype, init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype,
struct uri *uri); struct dom_string *uri);
void done_sgml_parser(struct sgml_parser *parser); void done_sgml_parser(struct sgml_parser *parser);
struct dom_node *parse_sgml(struct sgml_parser *parser, struct string *buffer); struct dom_node *parse_sgml(struct sgml_parser *parser, struct dom_string *buffer);
#endif #endif

View File

@ -9,8 +9,8 @@
#include "elinks.h" #include "elinks.h"
#include "document/sgml/rss/rss.h" #include "dom/sgml/rss/rss.h"
#include "document/sgml/sgml.h" #include "dom/sgml/sgml.h"
#define RSS_(node, name, id) SGML_NODE_INFO(RSS, node, name, id) #define RSS_(node, name, id) SGML_NODE_INFO(RSS, node, name, id)
@ -18,13 +18,13 @@
static struct sgml_node_info rss_attributes[RSS_ATTRIBUTES] = { static struct sgml_node_info rss_attributes[RSS_ATTRIBUTES] = {
SGML_NODE_HEAD(RSS, ATTRIBUTE), SGML_NODE_HEAD(RSS, ATTRIBUTE),
#include "document/sgml/rss/attribute.inc" #include "dom/sgml/rss/attribute.inc"
}; };
static struct sgml_node_info rss_elements[RSS_ELEMENTS] = { static struct sgml_node_info rss_elements[RSS_ELEMENTS] = {
SGML_NODE_HEAD(RSS, ELEMENT), SGML_NODE_HEAD(RSS, ELEMENT),
#include "document/sgml/rss/element.inc" #include "dom/sgml/rss/element.inc"
}; };

View File

@ -1,7 +1,7 @@
#ifndef EL__DOCUMENT_SGML_RSS_RSS_H #ifndef EL_DOM_SGML_RSS_RSS_H
#define EL__DOCUMENT_SGML_RSS_RSS_H #define EL_DOM_SGML_RSS_RSS_H
#include "document/sgml/sgml.h" #include "dom/sgml/sgml.h"
extern struct sgml_info sgml_rss_info; extern struct sgml_info sgml_rss_info;
@ -10,7 +10,7 @@ extern struct sgml_info sgml_rss_info;
enum rss_element_type { enum rss_element_type {
RSS_ELEMENT_UNKNOWN, RSS_ELEMENT_UNKNOWN,
#include "document/sgml/rss/element.inc" #include "dom/sgml/rss/element.inc"
RSS_ELEMENTS, RSS_ELEMENTS,
}; };
@ -18,7 +18,7 @@ enum rss_element_type {
enum rss_attribute_type { enum rss_attribute_type {
RSS_ATTRIBUTE_UNKNOWN, RSS_ATTRIBUTE_UNKNOWN,
#include "document/sgml/rss/attribute.inc" #include "dom/sgml/rss/attribute.inc"
RSS_ATTRIBUTES, RSS_ATTRIBUTES,
}; };

View File

@ -9,10 +9,10 @@
#include "elinks.h" #include "elinks.h"
#include "document/sgml/scanner.h" #include "dom/scanner.h"
#include "dom/sgml/scanner.h"
#include "dom/string.h"
#include "util/error.h" #include "util/error.h"
#include "util/scanner.h"
#include "util/string.h"
/* Bitmap entries for the SGML character groups used in the scanner table */ /* Bitmap entries for the SGML character groups used in the scanner table */
@ -34,38 +34,41 @@ enum sgml_char_group {
SGML_CHAR_NOT_ATTRIBUTE = (1 << 6), SGML_CHAR_NOT_ATTRIBUTE = (1 << 6),
}; };
static struct scan_table_info sgml_scan_table_info[] = { static struct dom_scan_table_info sgml_scan_table_info[] = {
SCAN_TABLE_RANGE("0", '9', SGML_CHAR_IDENT | SGML_CHAR_ENTITY), DOM_SCAN_TABLE_RANGE("0", '9', SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
SCAN_TABLE_RANGE("A", 'Z', SGML_CHAR_IDENT | SGML_CHAR_ENTITY), DOM_SCAN_TABLE_RANGE("A", 'Z', SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
SCAN_TABLE_RANGE("a", 'z', SGML_CHAR_IDENT | SGML_CHAR_ENTITY), DOM_SCAN_TABLE_RANGE("a", 'z', SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
/* For the octal number impared (me including) \241 is 161 --jonas */ /* For the octal number impared (me including) \241 is 161 --jonas */
SCAN_TABLE_RANGE("\241", 255, SGML_CHAR_IDENT | SGML_CHAR_ENTITY), DOM_SCAN_TABLE_RANGE("\241", 255, SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
SCAN_TABLE_STRING("-_:.", SGML_CHAR_IDENT | SGML_CHAR_ENTITY), DOM_SCAN_TABLE_STRING("-_:.", SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
SCAN_TABLE_STRING("#", SGML_CHAR_ENTITY), DOM_SCAN_TABLE_STRING("#", SGML_CHAR_ENTITY),
SCAN_TABLE_STRING(" \f\n\r\t\v", SGML_CHAR_WHITESPACE), DOM_SCAN_TABLE_STRING(" \f\n\r\t\v", SGML_CHAR_WHITESPACE),
SCAN_TABLE_STRING("\f\n\r", SGML_CHAR_NEWLINE), DOM_SCAN_TABLE_STRING("\f\n\r", SGML_CHAR_NEWLINE),
SCAN_TABLE_STRING("<&", SGML_CHAR_NOT_TEXT), DOM_SCAN_TABLE_STRING("<&", SGML_CHAR_NOT_TEXT),
SCAN_TABLE_STRING("<=>", SGML_CHAR_NOT_ATTRIBUTE), DOM_SCAN_TABLE_STRING("<=>", SGML_CHAR_NOT_ATTRIBUTE),
SCAN_TABLE_END, DOM_SCAN_TABLE_END,
}; };
static struct scanner_string_mapping sgml_string_mappings[] = { #define SGML_STRING_MAP(str, type, family) \
{ "--", SGML_TOKEN_NOTATION_COMMENT, SGML_TOKEN_NOTATION }, { INIT_DOM_STRING(str, -1), SGML_TOKEN_##type, SGML_TOKEN_##family }
{ "ATTLIST", SGML_TOKEN_NOTATION_ATTLIST, SGML_TOKEN_NOTATION },
{ "DOCTYPE", SGML_TOKEN_NOTATION_DOCTYPE, SGML_TOKEN_NOTATION },
{ "ELEMENT", SGML_TOKEN_NOTATION_ELEMENT, SGML_TOKEN_NOTATION },
{ "ENTITY", SGML_TOKEN_NOTATION_ENTITY, SGML_TOKEN_NOTATION },
{ "xml", SGML_TOKEN_PROCESS_XML, SGML_TOKEN_PROCESS }, static struct dom_scanner_string_mapping sgml_string_mappings[] = {
SGML_STRING_MAP("--", NOTATION_COMMENT, NOTATION),
SGML_STRING_MAP("ATTLIST", NOTATION_ATTLIST, NOTATION),
SGML_STRING_MAP("DOCTYPE", NOTATION_DOCTYPE, NOTATION),
SGML_STRING_MAP("ELEMENT", NOTATION_ELEMENT, NOTATION),
SGML_STRING_MAP("ENTITY", NOTATION_ENTITY, NOTATION),
{ NULL, SGML_TOKEN_NONE, SGML_TOKEN_NONE }, SGML_STRING_MAP("xml", PROCESS_XML, PROCESS),
DOM_STRING_MAP_END,
}; };
static struct scanner_token *scan_sgml_tokens(struct scanner *scanner); static struct dom_scanner_token *scan_sgml_tokens(struct dom_scanner *scanner);
struct scanner_info sgml_scanner_info = { struct dom_scanner_info sgml_scanner_info = {
sgml_string_mappings, sgml_string_mappings,
sgml_scan_table_info, sgml_scan_table_info,
scan_sgml_tokens, scan_sgml_tokens,
@ -91,7 +94,7 @@ struct scanner_info sgml_scanner_info = {
for (; ((str) < (scanner)->end && *(str) != '<' && *(str) != '&'); (str)++) for (; ((str) < (scanner)->end && *(str) != '<' && *(str) != '&'); (str)++)
static inline void static inline void
scan_sgml_text_token(struct scanner *scanner, struct scanner_token *token) scan_sgml_text_token(struct dom_scanner *scanner, struct dom_scanner_token *token)
{ {
unsigned char *string = scanner->position; unsigned char *string = scanner->position;
unsigned char first_char = *string; unsigned char first_char = *string;
@ -101,14 +104,14 @@ scan_sgml_text_token(struct scanner *scanner, struct scanner_token *token)
/* In scan_sgml_tokens() we check that first_char != '<' */ /* In scan_sgml_tokens() we check that first_char != '<' */
assert(first_char != '<' && scanner->state == SGML_STATE_TEXT); assert(first_char != '<' && scanner->state == SGML_STATE_TEXT);
token->string = string++; token->string.string = string++;
if (first_char == '&') { if (first_char == '&') {
if (is_sgml_entity(*string)) { if (is_sgml_entity(*string)) {
scan_sgml(scanner, string, SGML_CHAR_ENTITY); scan_sgml(scanner, string, SGML_CHAR_ENTITY);
type = SGML_TOKEN_ENTITY; type = SGML_TOKEN_ENTITY;
token->string++; token->string.string++;
real_length = string - token->string; real_length = string - token->string.string;
} }
foreach_sgml_cdata (scanner, string) { foreach_sgml_cdata (scanner, string) {
@ -133,7 +136,7 @@ scan_sgml_text_token(struct scanner *scanner, struct scanner_token *token)
} }
token->type = type; token->type = type;
token->length = real_length >= 0 ? real_length : string - token->string; token->string.length = real_length >= 0 ? real_length : string - token->string.string;
token->precedence = get_sgml_precedence(type); token->precedence = get_sgml_precedence(type);
scanner->position = string; scanner->position = string;
} }
@ -151,7 +154,7 @@ check_sgml_precedence(int type, int skipto)
/* XXX: Only element or ``in tag'' precedence is handled correctly however /* XXX: Only element or ``in tag'' precedence is handled correctly however
* using this function for CDATA or text would be overkill. */ * using this function for CDATA or text would be overkill. */
static inline unsigned char * static inline unsigned char *
skip_sgml(struct scanner *scanner, unsigned char **string, unsigned char skipto, skip_sgml(struct dom_scanner *scanner, unsigned char **string, unsigned char skipto,
int check_quoting) int check_quoting)
{ {
unsigned char *pos = *string; unsigned char *pos = *string;
@ -178,7 +181,7 @@ skip_sgml(struct scanner *scanner, unsigned char **string, unsigned char skipto,
} }
static inline int static inline int
skip_comment(struct scanner *scanner, unsigned char **string) skip_comment(struct dom_scanner *scanner, unsigned char **string)
{ {
unsigned char *pos = *string; unsigned char *pos = *string;
int length = 0; int length = 0;
@ -195,7 +198,7 @@ skip_comment(struct scanner *scanner, unsigned char **string)
} }
static inline int static inline int
skip_cdata_section(struct scanner *scanner, unsigned char **string) skip_cdata_section(struct dom_scanner *scanner, unsigned char **string)
{ {
unsigned char *pos = *string; unsigned char *pos = *string;
int length = 0; int length = 0;
@ -216,14 +219,14 @@ skip_cdata_section(struct scanner *scanner, unsigned char **string)
(str)++; (str)++;
static inline void static inline void
scan_sgml_element_token(struct scanner *scanner, struct scanner_token *token) scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *token)
{ {
unsigned char *string = scanner->position; unsigned char *string = scanner->position;
unsigned char first_char = *string; unsigned char first_char = *string;
enum sgml_token_type type = SGML_TOKEN_GARBAGE; enum sgml_token_type type = SGML_TOKEN_GARBAGE;
int real_length = -1; int real_length = -1;
token->string = string++; token->string.string = string++;
if (first_char == '<') { if (first_char == '<') {
scan_sgml(scanner, string, SGML_CHAR_WHITESPACE); scan_sgml(scanner, string, SGML_CHAR_WHITESPACE);
@ -237,10 +240,10 @@ scan_sgml_element_token(struct scanner *scanner, struct scanner_token *token)
scanner->state = SGML_STATE_TEXT; scanner->state = SGML_STATE_TEXT;
} else if (is_sgml_ident(*string)) { } else if (is_sgml_ident(*string)) {
token->string = string; token->string.string = string;
scan_sgml(scanner, string, SGML_CHAR_IDENT); scan_sgml(scanner, string, SGML_CHAR_IDENT);
real_length = string - token->string; real_length = string - token->string.string;
scan_sgml(scanner, string, SGML_CHAR_WHITESPACE); scan_sgml(scanner, string, SGML_CHAR_WHITESPACE);
if (*string == '>') { if (*string == '>') {
@ -257,13 +260,13 @@ scan_sgml_element_token(struct scanner *scanner, struct scanner_token *token)
string++; string++;
scan_sgml(scanner, string, SGML_CHAR_WHITESPACE); scan_sgml(scanner, string, SGML_CHAR_WHITESPACE);
token->string = ident = string; token->string.string = ident = string;
if (string + 1 < scanner->end if (string + 1 < scanner->end
&& string[0] == '-' && string[1] == '-') { && string[0] == '-' && string[1] == '-') {
string += 2; string += 2;
type = SGML_TOKEN_NOTATION_COMMENT; type = SGML_TOKEN_NOTATION_COMMENT;
token->string = string; token->string.string = string;
real_length = skip_comment(scanner, &string); real_length = skip_comment(scanner, &string);
assert(real_length >= 0); assert(real_length >= 0);
@ -272,13 +275,13 @@ scan_sgml_element_token(struct scanner *scanner, struct scanner_token *token)
string += 7; string += 7;
type = SGML_TOKEN_CDATA_SECTION; type = SGML_TOKEN_CDATA_SECTION;
token->string = string; token->string.string = string;
real_length = skip_cdata_section(scanner, &string); real_length = skip_cdata_section(scanner, &string);
assert(real_length >= 0); assert(real_length >= 0);
} else { } else {
scan_sgml(scanner, string, SGML_CHAR_IDENT); scan_sgml(scanner, string, SGML_CHAR_IDENT);
type = map_scanner_string(scanner, ident, string, base); type = map_dom_scanner_string(scanner, ident, string, base);
skip_sgml(scanner, &string, '>', 0); skip_sgml(scanner, &string, '>', 0);
} }
@ -288,10 +291,10 @@ scan_sgml_element_token(struct scanner *scanner, struct scanner_token *token)
string++; string++;
scan_sgml(scanner, string, SGML_CHAR_WHITESPACE); scan_sgml(scanner, string, SGML_CHAR_WHITESPACE);
token->string = pos = string; token->string.string = pos = string;
scan_sgml(scanner, string, SGML_CHAR_IDENT); scan_sgml(scanner, string, SGML_CHAR_IDENT);
type = map_scanner_string(scanner, pos, string, base); type = map_dom_scanner_string(scanner, pos, string, base);
/* Figure out where the processing instruction ends */ /* Figure out where the processing instruction ends */
for (pos = string; skip_sgml(scanner, &pos, '>', 0); ) { for (pos = string; skip_sgml(scanner, &pos, '>', 0); ) {
@ -299,7 +302,7 @@ scan_sgml_element_token(struct scanner *scanner, struct scanner_token *token)
/* Set length until '?' char and move position /* Set length until '?' char and move position
* beyond '>'. */ * beyond '>'. */
real_length = pos - token->string - 2; real_length = pos - token->string.string - 2;
break; break;
} }
@ -320,9 +323,9 @@ scan_sgml_element_token(struct scanner *scanner, struct scanner_token *token)
scan_sgml(scanner, string, SGML_CHAR_WHITESPACE); scan_sgml(scanner, string, SGML_CHAR_WHITESPACE);
if (is_sgml_ident(*string)) { if (is_sgml_ident(*string)) {
token->string = string; token->string.string = string;
scan_sgml(scanner, string, SGML_CHAR_IDENT); scan_sgml(scanner, string, SGML_CHAR_IDENT);
real_length = string - token->string; real_length = string - token->string.string;
type = SGML_TOKEN_ELEMENT_END; type = SGML_TOKEN_ELEMENT_END;
skip_sgml(scanner, &string, '>', 1); skip_sgml(scanner, &string, '>', 1);
@ -371,12 +374,12 @@ scan_sgml_element_token(struct scanner *scanner, struct scanner_token *token)
if (string_end) { if (string_end) {
/* We don't want the delimiters in the token */ /* We don't want the delimiters in the token */
token->string++; token->string.string++;
real_length = string_end - token->string; real_length = string_end - token->string.string;
string = string_end + 1; string = string_end + 1;
type = SGML_TOKEN_STRING; type = SGML_TOKEN_STRING;
} else if (is_sgml_attribute(*string)) { } else if (is_sgml_attribute(*string)) {
token->string++; token->string.string++;
scan_sgml_attribute(scanner, string); scan_sgml_attribute(scanner, string);
type = SGML_TOKEN_ATTRIBUTE; type = SGML_TOKEN_ATTRIBUTE;
} }
@ -394,7 +397,7 @@ scan_sgml_element_token(struct scanner *scanner, struct scanner_token *token)
} }
token->type = type; token->type = type;
token->length = real_length >= 0 ? real_length : string - token->string; token->string.length = real_length >= 0 ? real_length : string - token->string.string;
token->precedence = get_sgml_precedence(type); token->precedence = get_sgml_precedence(type);
scanner->position = string; scanner->position = string;
} }
@ -402,14 +405,14 @@ scan_sgml_element_token(struct scanner *scanner, struct scanner_token *token)
/* Scanner multiplexor */ /* Scanner multiplexor */
static struct scanner_token * static struct dom_scanner_token *
scan_sgml_tokens(struct scanner *scanner) scan_sgml_tokens(struct dom_scanner *scanner)
{ {
struct scanner_token *table_end = scanner->table + SCANNER_TOKENS; struct dom_scanner_token *table_end = scanner->table + DOM_SCANNER_TOKENS;
struct scanner_token *current; struct dom_scanner_token *current;
if (!begin_token_scanning(scanner)) if (!begin_dom_token_scanning(scanner))
return get_scanner_token(scanner); return get_dom_scanner_token(scanner);
/* Scan tokens until we fill the table */ /* Scan tokens until we fill the table */
for (current = scanner->table + scanner->tokens; for (current = scanner->table + scanner->tokens;
@ -431,5 +434,5 @@ scan_sgml_tokens(struct scanner *scanner)
} }
} }
return end_token_scanning(scanner, current); return end_dom_token_scanning(scanner, current);
} }

View File

@ -1,8 +1,8 @@
#ifndef EL__DOCUMENT_SGML_SCANNER_H #ifndef EL_DOM_SGML_SCANNER_H
#define EL__DOCUMENT_SGML_SCANNER_H #define EL_DOM_SGML_SCANNER_H
#include "util/scanner.h" #include "dom/scanner.h"
enum sgml_token_type { enum sgml_token_type {
/* Char tokens: */ /* Char tokens: */
@ -56,7 +56,7 @@ enum sgml_token_type {
SGML_TOKEN_NONE = 0, SGML_TOKEN_NONE = 0,
}; };
extern struct scanner_info sgml_scanner_info; extern struct dom_scanner_info sgml_scanner_info;
/* Treat '<' as more valuable then '>' so that scanning of '<a<b>' using /* Treat '<' as more valuable then '>' so that scanning of '<a<b>' using
* skipping to next '>' will stop at the second '<'. */ * skipping to next '>' will stop at the second '<'. */
@ -65,6 +65,6 @@ extern struct scanner_info sgml_scanner_info;
(token_type) == '>' ? (1 << 10) : 0) (token_type) == '>' ? (1 << 10) : 0)
#define skip_sgml_tokens(scanner, type) \ #define skip_sgml_tokens(scanner, type) \
skip_scanner_tokens(scanner, type, get_sgml_precedence(type)) skip_dom_scanner_tokens(scanner, type, get_sgml_precedence(type))
#endif #endif

View File

@ -9,15 +9,15 @@
#include "elinks.h" #include "elinks.h"
#include "document/dom/node.h" #include "dom/node.h"
#include "document/sgml/sgml.h" #include "dom/sgml/sgml.h"
#include "dom/string.h"
#include "util/error.h" #include "util/error.h"
#include "util/string.h"
/* Backend includes: */ /* Backend includes: */
#include "document/sgml/html/html.h" #include "dom/sgml/html/html.h"
#include "document/sgml/rss/rss.h" #include "dom/sgml/rss/rss.h"
int int

View File

@ -1,11 +1,11 @@
#ifndef EL__DOCUMENT_SGML_SGML_H #ifndef EL_DOM_SGML_SGML_H
#define EL__DOCUMENT_SGML_SGML_H #define EL_DOM_SGML_SGML_H
#include <stdlib.h> #include <stdlib.h>
#include "document/dom/stack.h" #include "dom/stack.h"
#include "document/dom/string.h" #include "dom/string.h"
/* The flags stored in the attribute sgml node info data */ /* The flags stored in the attribute sgml node info data */
/* TODO: Other potential flags (there can be only 16) /* TODO: Other potential flags (there can be only 16)

View File

@ -9,10 +9,10 @@
#include "elinks.h" #include "elinks.h"
#include "document/dom/node.h" #include "dom/node.h"
#include "document/dom/stack.h" #include "dom/stack.h"
#include "dom/string.h"
#include "util/memory.h" #include "util/memory.h"
#include "util/string.h"
/* Navigator states */ /* Navigator states */
@ -463,11 +463,7 @@ set_enhanced_dom_node_value(struct dom_string *string, struct dom_node *node)
switch (node->type) { switch (node->type) {
case DOM_NODE_ENTITY_REFERENCE: case DOM_NODE_ENTITY_REFERENCE:
/* XXX: The ASCII codepage is hardcoded here since we do not /* FIXME: Set to the entity value. */
* want to depend on anything and this is really just for
* debugging. */
string->string = get_entity_string(node->string.string,
node->string.length, 0);
string->string = null_or_stracpy(string->string); string->string = null_or_stracpy(string->string);
break; break;
@ -573,7 +569,7 @@ struct dom_stack_context_info dom_stack_trace_context_info = {
/* DOM_NODE_ENTITY */ dom_stack_trace_id_leaf, /* DOM_NODE_ENTITY */ dom_stack_trace_id_leaf,
/* DOM_NODE_PROC_INSTRUCTION */ dom_stack_trace_id_leaf, /* DOM_NODE_PROC_INSTRUCTION */ dom_stack_trace_id_leaf,
/* DOM_NODE_COMMENT */ dom_stack_trace_leaf, /* DOM_NODE_COMMENT */ dom_stack_trace_leaf,
/* DOM_NODE_DOCUMENT */ dom_stack_trace, /* DOM_NODE_DOCUMENT */ dom_stack_trace_tree,
/* DOM_NODE_DOCUMENT_TYPE */ dom_stack_trace_id_leaf, /* DOM_NODE_DOCUMENT_TYPE */ dom_stack_trace_id_leaf,
/* DOM_NODE_DOCUMENT_FRAGMENT */ dom_stack_trace_id_leaf, /* DOM_NODE_DOCUMENT_FRAGMENT */ dom_stack_trace_id_leaf,
/* DOM_NODE_NOTATION */ dom_stack_trace_id_leaf, /* DOM_NODE_NOTATION */ dom_stack_trace_id_leaf,

View File

@ -1,7 +1,7 @@
#ifndef EL__DOCUMENT_DOM_STACK_H #ifndef EL_DOM_STACK_H
#define EL__DOCUMENT_DOM_STACK_H #define EL_DOM_STACK_H
#include "document/dom/node.h" #include "dom/node.h"
#include "util/error.h" #include "util/error.h"
#include "util/hash.h" #include "util/hash.h"

View File

@ -1,5 +1,7 @@
#ifndef EL__DOCUMENT_DOM_STRING_H #ifndef EL_DOM_STRING_H
#define EL__DOCUMENT_DOM_STRING_H #define EL_DOM_STRING_H
#include "util/memory.h"
struct dom_string { struct dom_string {
size_t length; size_t length;
@ -17,7 +19,7 @@ set_dom_string(struct dom_string *string, unsigned char *value, size_t length)
} }
static inline int static inline int
dom_string_casecmp(struct dom_string *string1, struct dom_string *string2) dom_string_casecmp(const struct dom_string *string1, const struct dom_string *string2)
{ {
size_t length = int_min(string1->length, string2->length); size_t length = int_min(string1->length, string2->length);
size_t string_diff = strncasecmp(string1->string, string2->string, length); size_t string_diff = strncasecmp(string1->string, string2->string, length);
@ -33,8 +35,26 @@ dom_string_ncasecmp(struct dom_string *string1, struct dom_string *string2, size
return strncasecmp(string1->string, string2->string, length); return strncasecmp(string1->string, string2->string, length);
} }
#define copy_dom_string(string1, string2) \
set_dom_string(string1, (string2)->string, (string2)->length)
static inline struct dom_string *
init_dom_string(struct dom_string *string, unsigned char *str, size_t len)
{
string->string = mem_alloc(len + 1);
if (!string->string)
return NULL;
memcpy(string->string, str, len);
string->string[len] = 0;
string->length = len;
return string;
}
#define is_dom_string_set(str) ((str)->string && (str)->length) #define is_dom_string_set(str) ((str)->string && (str)->length)
#define done_dom_string(str) mem_free((str)->string); #define done_dom_string(str) mem_free((str)->string);
#define isquote(c) ((c) == '"' || (c) == '\'')
#endif #endif

22
src/dom/test/Makefile Normal file
View File

@ -0,0 +1,22 @@
top_builddir=../../..
include $(top_builddir)/Makefile.config
PROG = sgml-parser
TESTDEPS = \
$(top_builddir)/src/dom/lib.o \
$(top_builddir)/src/util/error.o \
$(top_builddir)/src/osdep/stub.o \
$(top_builddir)/src/util/hash.o \
$(top_builddir)/src/util/memdebug.o \
$(top_builddir)/src/util/memory.o
sgml-parser: $(TESTDEPS) sgml-parser.o
$(call cmd,link)
test: sgml-parser
./sgml-parser
CLEAN += sgml-parser.o
include $(top_srcdir)/Makefile.lib

199
src/dom/test/sgml-parser.c Normal file
View File

@ -0,0 +1,199 @@
/* Tool for testing the SGML parser */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "elinks.h"
#include "dom/node.h"
#include "dom/sgml/parser.h"
#include "dom/stack.h"
/* Print the string in a compressed form: a single line with newlines etc.
* replaced with "\\n" sequence. */
static void
print_compressed_string(struct dom_string *string)
{
unsigned char escape[2] = "\\";
size_t pos;
for (pos = 0; pos < string->length; pos++) {
unsigned char data = string->string[pos];
switch (data) {
case '\n': escape[1] = 'n'; break;
case '\r': escape[1] = 'r'; break;
case '\t': escape[1] = 't'; break;
default:
putchar(data);
continue;
}
printf("%s", escape);
}
}
/* Set @string to the value of the given @node, however, with strings
* compressed and entity references 'expanded'. */
static void
print_dom_node_value(struct dom_node *node)
{
struct dom_string *value;
assert(node);
switch (node->type) {
case DOM_NODE_ENTITY_REFERENCE:
/* FIXME: Set to the entity value. */
printf("%.*s", node->string.length, node->string.string);
break;
default:
value = get_dom_node_value(node);
if (!value) {
printf("(no value)");
return;
}
print_compressed_string(value);
}
}
static unsigned char indent_string[] =
" ";
#define get_indent_offset(stack) \
((stack)->depth < sizeof(indent_string)/2 ? (stack)->depth * 2 : sizeof(indent_string))
static void
sgml_parser_test_tree(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_string *value = &node->string;
struct dom_string *name = get_dom_node_name(node);
printf("%.*s %.*s: %.*s\n",
get_indent_offset(stack), indent_string,
name->length, name->string,
value->length, value->string);
}
static void
sgml_parser_test_id_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_string *name;
struct dom_string *id;
assert(node);
name = get_dom_node_name(node);
id = get_dom_node_type_name(node->type);
printf("%.*s %.*s: %.*s -> ",
get_indent_offset(stack), indent_string,
id->length, id->string, name->length, name->string);
print_dom_node_value(node);
printf("\n");
}
static void
sgml_parser_test_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_string *name;
assert(node);
name = get_dom_node_name(node);
printf("%.*s %.*s: ",
get_indent_offset(stack), indent_string,
name->length, name->string);
print_dom_node_value(node);
printf("\n");
}
static void
sgml_parser_test_branch(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_string *name;
struct dom_string *id;
assert(node);
name = get_dom_node_name(node);
id = get_dom_node_type_name(node->type);
printf("%.*s %.*s: %.*s\n",
get_indent_offset(stack), indent_string,
id->length, id->string, name->length, name->string);
}
struct dom_stack_context_info sgml_parser_test_context_info = {
/* Object size: */ 0,
/* Push: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ sgml_parser_test_branch,
/* DOM_NODE_ATTRIBUTE */ sgml_parser_test_id_leaf,
/* DOM_NODE_TEXT */ sgml_parser_test_leaf,
/* DOM_NODE_CDATA_SECTION */ sgml_parser_test_id_leaf,
/* DOM_NODE_ENTITY_REFERENCE */ sgml_parser_test_id_leaf,
/* DOM_NODE_ENTITY */ sgml_parser_test_id_leaf,
/* DOM_NODE_PROC_INSTRUCTION */ sgml_parser_test_id_leaf,
/* DOM_NODE_COMMENT */ sgml_parser_test_leaf,
/* DOM_NODE_DOCUMENT */ sgml_parser_test_tree,
/* DOM_NODE_DOCUMENT_TYPE */ sgml_parser_test_id_leaf,
/* DOM_NODE_DOCUMENT_FRAGMENT */ sgml_parser_test_id_leaf,
/* DOM_NODE_NOTATION */ sgml_parser_test_id_leaf,
},
/* Pop: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ NULL,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ NULL,
/* DOM_NODE_CDATA_SECTION */ NULL,
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
}
};
int
main(int argc, char *argv[])
{
struct dom_node *root;
struct sgml_parser *parser;
enum sgml_document_type doctype = SGML_DOCTYPE_HTML;
struct dom_string uri = INIT_DOM_STRING("dom://test", -1);
struct dom_string buffer = INIT_DOM_STRING("<html><body><p>Hello World!</p></body></html>", -1);
parser = init_sgml_parser(SGML_PARSER_STREAM, doctype, &uri);
if (!parser) return 1;
add_dom_stack_context(&parser->stack, NULL, &sgml_parser_test_context_info);
root = parse_sgml(parser, &buffer);
if (root) {
assert(parser->stack.depth == 1);
get_dom_stack_top(&parser->stack)->immutable = 0;
/* For SGML_PARSER_STREAM this will free the DOM
* root node. */
pop_dom_node(&parser->stack);
}
done_sgml_parser(parser);
return 0;
}

View File

@ -551,7 +551,8 @@ set_mem_comment(void *ptr, unsigned char *str, int len)
ah->comment = malloc(len + 1); ah->comment = malloc(len + 1);
if (ah->comment) { if (ah->comment) {
safe_strncpy(ah->comment, str, len + 1); memcpy(ah->comment, str, len);
ah->comment[len] = 0;
mem_stats.true_amount += len + 1; mem_stats.true_amount += len + 1;
} }
} }