1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-09-28 03:06:20 -04:00

Add basic functionality for selecting DOM nodes based on CSS3 selectors

The design should more or less be in place. There is still a lot of things
missing but it should actually be enough for using it in a simple RSS renderer.

Amongst several things, :nth-* pseudo-classes and :not() syntax is not in
place.
This commit is contained in:
Jonas Fonseca 2005-12-19 03:44:18 +01:00 committed by Jonas Fonseca
parent b64e122159
commit 2e2c0a590e
3 changed files with 1219 additions and 1 deletions

View File

@ -1,6 +1,6 @@
top_builddir=../../..
include $(top_builddir)/Makefile.config
OBJS = node.o renderer.o stack.o
OBJS = node.o renderer.o select.o stack.o
include $(top_srcdir)/Makefile.lib

861
src/document/dom/select.c Normal file
View File

@ -0,0 +1,861 @@
/* DOM node selection */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "elinks.h"
#include "document/css/scanner.h"
#include "document/dom/dom.h"
#include "document/dom/node.h"
#include "document/dom/select.h"
#include "document/dom/stack.h"
#include "util/memory.h"
#include "util/scanner.h"
#include "util/string.h"
static enum dom_select_pseudo
get_dom_select_pseudo(struct scanner_token *token)
{
static struct {
struct dom_string string;
enum dom_select_pseudo pseudo;
} pseudo_info[] = {
#define INIT_DOM_SELECT_PSEUDO_STRING(str, type) \
{ INIT_DOM_STRING(str, -1), DOM_SELECT_PSEUDO_##type }
INIT_DOM_SELECT_PSEUDO_STRING("first-line", FIRST_LINE),
INIT_DOM_SELECT_PSEUDO_STRING("first-letter", FIRST_LETTER),
INIT_DOM_SELECT_PSEUDO_STRING("selection", SELECTION),
INIT_DOM_SELECT_PSEUDO_STRING("after", AFTER),
INIT_DOM_SELECT_PSEUDO_STRING("before", BEFORE),
INIT_DOM_SELECT_PSEUDO_STRING("link", LINK),
INIT_DOM_SELECT_PSEUDO_STRING("visited", VISITED),
INIT_DOM_SELECT_PSEUDO_STRING("active", ACTIVE),
INIT_DOM_SELECT_PSEUDO_STRING("hover", HOVER),
INIT_DOM_SELECT_PSEUDO_STRING("focus", FOCUS),
INIT_DOM_SELECT_PSEUDO_STRING("target", TARGET),
INIT_DOM_SELECT_PSEUDO_STRING("enabled", ENABLED),
INIT_DOM_SELECT_PSEUDO_STRING("disabled", DISABLED),
INIT_DOM_SELECT_PSEUDO_STRING("checked", CHECKED),
INIT_DOM_SELECT_PSEUDO_STRING("indeterminate", INDETERMINATE),
/* Content pseudo-classes: */
INIT_DOM_SELECT_PSEUDO_STRING("contains", CONTAINS),
/* Structural pseudo-classes: */
INIT_DOM_SELECT_PSEUDO_STRING("nth-child", NTH_CHILD),
INIT_DOM_SELECT_PSEUDO_STRING("nth-last-child", NTH_LAST_CHILD),
INIT_DOM_SELECT_PSEUDO_STRING("first-child", FIRST_CHILD),
INIT_DOM_SELECT_PSEUDO_STRING("last-child", LAST_CHILD),
INIT_DOM_SELECT_PSEUDO_STRING("only-child", ONLY_CHILD),
INIT_DOM_SELECT_PSEUDO_STRING("nth-of-type", NTH_TYPE),
INIT_DOM_SELECT_PSEUDO_STRING("nth-last-of-type",NTH_LAST_TYPE),
INIT_DOM_SELECT_PSEUDO_STRING("first-of-type", FIRST_TYPE),
INIT_DOM_SELECT_PSEUDO_STRING("last-of-type", LAST_TYPE),
INIT_DOM_SELECT_PSEUDO_STRING("only-of-type", ONLY_TYPE),
INIT_DOM_SELECT_PSEUDO_STRING("root", ROOT),
INIT_DOM_SELECT_PSEUDO_STRING("empty", EMPTY),
#undef INIT_DOM_SELECT_PSEUDO_STRING
};
struct dom_string string;
int i;
set_dom_string(&string, token->string, token->length);
for (i = 0; i < sizeof_array(pseudo_info); i++)
if (!dom_string_casecmp(&pseudo_info[i].string, &string))
return pseudo_info[i].pseudo;
return DOM_SELECT_PSEUDO_UNKNOWN;
}
static enum dom_exception_code
parse_dom_select_attribute(struct dom_select_node *sel, struct scanner *scanner)
{
struct scanner_token *token = get_scanner_token(scanner);
if (token->type != '[')
return DOM_ERR_INVALID_STATE;
token = get_next_scanner_token(scanner);
if (!token || token->type != CSS_TOKEN_IDENT)
return DOM_ERR_SYNTAX;
set_dom_string(&sel->node.string, token->string, token->length);
token = get_next_scanner_token(scanner);
if (!token) return DOM_ERR_SYNTAX;
switch (token->type) {
case ']':
sel->match.attribute |= DOM_SELECT_ATTRIBUTE_ANY;
return DOM_ERR_NONE;
case CSS_TOKEN_SELECT_SPACE_LIST:
sel->match.attribute |= DOM_SELECT_ATTRIBUTE_SPACE_LIST;
break;
case CSS_TOKEN_SELECT_HYPHEN_LIST:
sel->match.attribute |= DOM_SELECT_ATTRIBUTE_HYPHEN_LIST;
break;
case CSS_TOKEN_SELECT_BEGIN:
sel->match.attribute |= DOM_SELECT_ATTRIBUTE_BEGIN;
break;
case CSS_TOKEN_SELECT_END:
sel->match.attribute |= DOM_SELECT_ATTRIBUTE_END;
break;
case CSS_TOKEN_SELECT_CONTAINS:
sel->match.attribute |= DOM_SELECT_ATTRIBUTE_CONTAINS;
break;
default:
return DOM_ERR_SYNTAX;
}
token = get_next_scanner_token(scanner);
if (!token) return DOM_ERR_SYNTAX;
switch (token->type) {
case CSS_TOKEN_IDENT:
case CSS_TOKEN_STRING:
set_dom_string(&sel->node.data.attribute.value, token->string, token->length);
break;
default:
return DOM_ERR_SYNTAX;
}
token = get_next_scanner_token(scanner);
if (token && token->type == ']')
return DOM_ERR_NONE;
return DOM_ERR_SYNTAX;
}
/* Parse:
*
* 0n+1 / 1
* 2n+0 / 2n
* 2n+1
* -0n+2
* -0n+1 / -1
* 1n+0 / n+0 / n
* 0n+0
*/
static enum dom_exception_code
parse_dom_select_nth_numeric(struct dom_select_nth_match *nth,
struct scanner_token *arg)
{
size_t sign = 1;
size_t number;
/* Parse negated value: -0n+1 */
if (arg->string[0] == '-') {
arg->string++, arg->length--;
sign = -1;
if (arg->length == 0)
return DOM_ERR_SYNTAX;
}
/* Parse -n or n */
if (arg->string[0] == 'n') {
nth->step = sign;
arg->string++, arg->length--;
} else if (isdigit(arg->string[0])) {
number = 0;
do {
size_t old_number = number;
number *= 10;
if (old_number > number)
return DOM_ERR_NOT_SUPPORTED;
number += arg->string[0] - '0';
arg->string++, arg->length--;
} while (arg->length > 0 && isdigit(arg->string[0]));
if (arg->length > 0 && arg->string[0] == 'n') {
nth->step = number * sign;
arg->string++, arg->length--;
} else {
nth->step = 0;
nth->index = number * sign;
return DOM_ERR_NONE;
}
}
/* Parse the +... part of n+2 */
if (arg->length <= 1 || arg->string[0] != '+')
return DOM_ERR_NONE;
arg->string++, arg->length--;
/* Accept 2n+ */
if (!isdigit(arg->string[0]))
return DOM_ERR_NONE;
number = 0;
do {
size_t old_number = number;
number *= 10;
if (old_number > number)
return DOM_ERR_NOT_SUPPORTED;
number += arg->string[0] - '0';
arg->string++, arg->length--;
} while (arg->length > 0 && isdigit(arg->string[0]));
nth->index = number * sign;
return DOM_ERR_NONE;
}
static enum dom_exception_code
parse_dom_select_nth_arg(struct dom_select_nth_match *nth, struct scanner *scanner)
{
struct scanner_token *token = get_next_scanner_token(scanner);
struct scanner_token arg;
if (!token || token->type != '(')
return DOM_ERR_SYNTAX;
token = get_next_scanner_token(scanner);
if (!token || token->type == ')')
return DOM_ERR_SYNTAX;
copy_struct(&arg, token);
do {
/* Combine all the arg material to one token. */
arg.length = (token->string + token->length) - arg.string;
token = get_next_scanner_token(scanner);
if (!token) return DOM_ERR_SYNTAX;
} while (token->type != ')');
if (scanner_token_contains(&arg, "even")) {
nth->step = 2;
nth->index = 0;
} else if (scanner_token_contains(&arg, "odd")) {
nth->step = 2;
nth->index = 1;
} else if (arg.length > 0) {
return parse_dom_select_nth_numeric(nth, &arg);
return DOM_ERR_NONE;
}
return DOM_ERR_NONE;
}
static enum dom_exception_code
parse_dom_select_pseudo(struct dom_select *select, struct dom_select_node *sel,
struct scanner *scanner)
{
struct scanner_token *token = get_scanner_token(scanner);
enum dom_select_pseudo pseudo;
enum dom_exception_code code;
/* Skip double :'s in front of some pseudo's */
do {
token = get_next_scanner_token(scanner);
} while (token && token->type == ':');
if (!token) return DOM_ERR_SYNTAX;
pseudo = get_dom_select_pseudo(token);
switch (pseudo) {
case DOM_SELECT_PSEUDO_UNKNOWN:
return DOM_ERR_NOT_FOUND;
case DOM_SELECT_PSEUDO_CONTAINS:
/* FIXME: E:contains("text") */
break;
case DOM_SELECT_PSEUDO_NTH_CHILD:
case DOM_SELECT_PSEUDO_NTH_LAST_CHILD:
code = parse_dom_select_nth_arg(&sel->nth_child, scanner);
if (code != DOM_ERR_NONE)
return code;
sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
break;
case DOM_SELECT_PSEUDO_FIRST_CHILD:
sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
set_dom_select_nth_match(&sel->nth_child, 0, 1);
break;
case DOM_SELECT_PSEUDO_LAST_CHILD:
sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
set_dom_select_nth_match(&sel->nth_child, 0, -1);
break;
case DOM_SELECT_PSEUDO_ONLY_CHILD:
sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
set_dom_select_nth_match(&sel->nth_child, 0, 0);
break;
case DOM_SELECT_PSEUDO_NTH_TYPE:
case DOM_SELECT_PSEUDO_NTH_LAST_TYPE:
code = parse_dom_select_nth_arg(&sel->nth_type, scanner);
if (code != DOM_ERR_NONE)
return code;
sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
break;
case DOM_SELECT_PSEUDO_FIRST_TYPE:
sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
set_dom_select_nth_match(&sel->nth_type, 0, 1);
break;
case DOM_SELECT_PSEUDO_LAST_TYPE:
sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
set_dom_select_nth_match(&sel->nth_type, 0, -1);
break;
case DOM_SELECT_PSEUDO_ONLY_TYPE:
sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
set_dom_select_nth_match(&sel->nth_type, 0, 0);
break;
case DOM_SELECT_PSEUDO_ROOT:
sel->match.element |= DOM_SELECT_ELEMENT_ROOT;
break;
case DOM_SELECT_PSEUDO_EMPTY:
sel->match.element |= DOM_SELECT_ELEMENT_EMPTY;
break;
default:
/* It's a bitflag! */
select->pseudo |= pseudo;
}
return DOM_ERR_NONE;
}
#define get_element_relation(sel) \
((sel)->match.element & DOM_SELECT_RELATION_FLAGS)
static enum dom_exception_code
parse_dom_select(struct dom_select *select, unsigned char *string, int length)
{
struct dom_stack stack;
struct scanner scanner;
struct dom_select_node sel;
init_scanner(&scanner, &css_scanner_info, string, string + length);
init_dom_stack(&stack, select, NULL, NULL, 0, 1);
memset(&sel, 0, sizeof(sel));
while (scanner_has_tokens(&scanner)) {
struct scanner_token *token = get_scanner_token(&scanner);
enum dom_exception_code code;
struct dom_select_node *select_node;
assert(token);
if (token->type == '{'
|| token->type == '}'
|| token->type == ';'
|| token->type == ',')
break;
/* Examine the selector fragment */
switch (token->type) {
case CSS_TOKEN_IDENT:
sel.node.type = DOM_NODE_ELEMENT;
set_dom_string(&sel.node.string, token->string, token->length);
if (token->length == 1 && token->string[0] == '*')
sel.match.element |= DOM_SELECT_ELEMENT_UNIVERSAL;
break;
case CSS_TOKEN_HASH:
case CSS_TOKEN_HEX_COLOR:
/* ID fragment */
sel.node.type = DOM_NODE_ATTRIBUTE;
sel.match.attribute |= DOM_SELECT_ATTRIBUTE_ID;
/* Skip the leading '#'. */
token->string++, token->length--;
break;
case '[':
sel.node.type = DOM_NODE_ATTRIBUTE;
code = parse_dom_select_attribute(&sel, &scanner);
if (code != DOM_ERR_NONE)
return code;
break;
case '.':
token = get_next_scanner_token(&scanner);
if (!token || token->type != CSS_TOKEN_IDENT)
return DOM_ERR_SYNTAX;
sel.node.type = DOM_NODE_ATTRIBUTE;
sel.match.attribute |= DOM_SELECT_ATTRIBUTE_SPACE_LIST;
set_dom_string(&sel.node.string, "class", -1);
set_dom_string(&sel.node.data.attribute.value, token->string, token->length);
break;
case ':':
code = parse_dom_select_pseudo(select, &sel, &scanner);
if (code != DOM_ERR_NONE)
return code;
break;
case '>':
if (get_element_relation(&sel))
return DOM_ERR_SYNTAX;
sel.match.element |= DOM_SELECT_RELATION_DIRECT_CHILD;
break;
case '+':
if (get_element_relation(&sel))
return DOM_ERR_SYNTAX;
sel.match.element |= DOM_SELECT_RELATION_DIRECT_ADJACENT;
break;
case '~':
if (get_element_relation(&sel))
return DOM_ERR_SYNTAX;
sel.match.element |= DOM_SELECT_RELATION_INDIRECT_ADJACENT;
break;
default:
return DOM_ERR_SYNTAX;
}
skip_scanner_token(&scanner);
if (sel.node.type == DOM_NODE_UNKNOWN)
continue;
WDBG("Adding %s: %.*s", (sel.node.type == DOM_NODE_ELEMENT) ? "element" : "attr", sel.node.string.length, sel.node.string.string);
/* FIXME */
select_node = mem_calloc(1, sizeof(*select_node));
copy_struct(select_node, &sel);
if (select_node->node.parent) {
struct dom_node *node = &select_node->node;
struct dom_node *parent = node->parent;
struct dom_node_list **list = get_dom_node_list(parent, node);
int sort = (node->type == DOM_NODE_ATTRIBUTE);
int index;
assertm(list, "Adding node to bad parent",
get_dom_node_type_name(node->type),
get_dom_node_type_name(parent->type));
index = *list && (*list)->size > 0 && sort
? get_dom_node_map_index(*list, node) : -1;
if (!add_to_dom_node_list(list, node, index)) {
done_dom_node(node);
return DOM_ERR_INVALID_STATE;
}
} else {
assert(!select->selector);
select->selector = select_node;
}
memset(&sel, 0, sizeof(sel));
sel.node.parent = &select_node->node;
}
if (select->selector)
return DOM_ERR_NONE;
WDBG("All has failed ...");
return DOM_ERR_INVALID_STATE;
}
void
print_dom_select(struct dom_select *select)
{
if (select->selector) {
struct dom_node *node = (struct dom_node *) select->selector;
done_dom_node(node);
}
mem_free(select);
}
struct dom_select *
init_dom_select(enum dom_select_syntax syntax,
unsigned char *string, int length)
{
struct dom_select *select = mem_calloc(1, sizeof(select));
enum dom_exception_code code;
code = parse_dom_select(select, string, length);
if (code == DOM_ERR_NONE)
return select;
done_dom_select(select);
return NULL;
}
void
done_dom_select(struct dom_select *select)
{
if (select->selector) {
struct dom_node *node = (struct dom_node *) select->selector;
done_dom_node(node);
}
mem_free(select);
}
struct dom_select_data {
struct dom_stack stack;
struct dom_select *select;
struct dom_node_list *list;
};
struct dom_select_state {
struct dom_node *node;
};
static int
compare_element_type(struct dom_node *node1, struct dom_node *node2)
{
/* Assuming the same document type */
if (node1->data.element.type
&& node2->data.element.type
&& node1->data.element.type == node2->data.element.type)
return 0;
return dom_string_casecmp(&node1->string, &node2->string);
}
static struct dom_select_node *
get_child_dom_select_node(struct dom_select_node *selector,
enum dom_node_type type)
{
struct dom_node_list *children = selector->node.data.element.children;
size_t index;
if (!children)
return NULL;
for (index = 0; is_dom_node_list_member(children, index); index++) {
struct dom_node *node = children->entries[index];
if (node && node->type == type)
return (struct dom_select_node *) node;
}
return NULL;
}
#define has_attribute_match(selector, name) \
((selector)->match.attribute & (name))
static int
match_attribute_selectors(struct dom_select_node *base, struct dom_node *node)
{
struct dom_node_list *attrs = node->data.element.map;
struct dom_node_list *selnodes = base->node.data.element.map;
struct dom_node *selnode;
size_t index;
assert(base->node.type == DOM_NODE_ELEMENT
&& node->type == DOM_NODE_ELEMENT);
if (!selnodes)
return 1;
if (!attrs)
return 0;
foreach_dom_node (selnodes, selnode, index) {
struct dom_select_node *selector = (void *) selnode;
struct dom_node *attr;
struct dom_string *value;
struct dom_string *selvalue;
if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_ID)) {
size_t idindex;
foreach_dom_node (attrs, attr, idindex) {
if (attr->data.attribute.id)
break;
}
if (!is_dom_node_list_member(attrs, idindex))
attr = NULL;
} else {
attr = get_dom_node_map_entry(attrs, DOM_NODE_ATTRIBUTE,
selnode->data.attribute.type,
&selnode->string);
}
if (!attr)
return 0;
if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_ANY))
continue;
value = &attr->data.attribute.value;
selvalue = &selnode->data.attribute.value;
if (value->length < selvalue->length)
return 0;
if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_EXACT)
&& dom_string_casecmp(value, selvalue))
return 0;
if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_BEGIN))
return 0;
if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_END))
return 0;
if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_SPACE_LIST))
return 0;
if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_HYPHEN_LIST))
return 0;
if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_CONTAINS))
return 0;
}
return 1;
}
#define has_element_match(selector, name) \
((selector)->match.element & (name))
static struct dom_node *
dom_select_push_element(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_select_data *select_data = stack->data;
struct dom_stack_state *state;
int pos;
WDBG("Push element %.*s.", node->string.length, node->string.string);
foreach_dom_stack_state(&select_data->stack, state, pos) {
struct dom_select_node *selector = (void *) state->node;
/* Match the node. */
if (!has_element_match(selector, DOM_SELECT_ELEMENT_UNIVERSAL)
&& compare_element_type(&selector->node, node))
continue;
switch (get_element_relation(selector)) {
case DOM_SELECT_RELATION_DIRECT_CHILD: /* E > F */
/* node->parent */
/* Check all states to see if node->parent is there
* and for the right reasons. */
break;
case DOM_SELECT_RELATION_DIRECT_ADJACENT: /* E + F */
/* Get preceding node to see if it is on the stack. */
break;
case DOM_SELECT_RELATION_INDIRECT_ADJACENT: /* E ~ F */
/* Check all states with same depth? */
break;
case DOM_SELECT_RELATION_DESCENDANT: /* E F */
default:
break;
}
/* Roots don't have parent nodes. */
if (has_element_match(selector, DOM_SELECT_ELEMENT_ROOT)
&& node->parent)
continue;
if (has_element_match(selector, DOM_SELECT_ELEMENT_EMPTY)
&& node->data.element.map->size > 0)
continue;
if (has_element_match(selector, DOM_SELECT_ELEMENT_NTH_CHILD)) {
/* FIXME */
continue;
}
if (has_element_match(selector, DOM_SELECT_ELEMENT_NTH_TYPE)) {
/* FIXME */
continue;
}
/* Check attribute selectors. */
if (selector->node.data.element.map
&& !match_attribute_selectors(selector, node))
continue;
WDBG("Matched element: %.*s.", node->string.length, node->string.string);
/* This node is matched, so push the next selector node to
* match on the stack. */
selector = get_child_dom_select_node(selector, DOM_NODE_ELEMENT);
if (selector)
push_dom_node(&select_data->stack, &selector->node);
}
return node;
}
static struct dom_node *
dom_select_pop_element(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_select_data *select_data = stack->data;
struct dom_stack_state *state;
int index;
WDBG("Pop element: %.*s", node->string.length, node->string.string);
stack = &select_data->stack;
foreachback_dom_stack_state (stack, state, index) {
struct dom_select_node *selector = (void *) state->node;
struct dom_select_state *select_state;
select_state = get_dom_stack_state_data(stack, state);
if (select_state->node == node) {
pop_dom_state(stack, state);
WDBG("Remove element.");
continue;
}
/* Pop states that no longer lives up to a relation. */
switch (get_element_relation(selector)) {
case DOM_SELECT_RELATION_DIRECT_CHILD: /* E > F */
case DOM_SELECT_RELATION_DIRECT_ADJACENT: /* E + F */
case DOM_SELECT_RELATION_INDIRECT_ADJACENT: /* E ~ F */
case DOM_SELECT_RELATION_DESCENDANT: /* E F */
default:
break;
}
}
return node;
}
static struct dom_node *
dom_select_push_text(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_select_data *select_data = stack->data;
struct dom_stack_state *state = get_dom_stack_top(&select_data->stack);
struct dom_select_node *selector = (void *) state->node;
struct dom_select_node *text_sel = get_child_dom_select_node(selector, DOM_NODE_TEXT);
struct dom_string *text;
WDBG("Text node: %d chars", node->string.length);
if (!text_sel)
return node;
text = &text_sel->node.string;
switch (node->type) {
case DOM_NODE_TEXT:
case DOM_NODE_CDATA_SECTION:
case DOM_NODE_ENTITY_REFERENCE:
break;
default:
ERROR("Unhandled type");
}
return node;
}
dom_stack_callback_T dom_select_push_callbacks[DOM_NODES] = {
/* */ NULL,
/* DOM_NODE_ELEMENT */ dom_select_push_element,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ dom_select_push_text,
/* DOM_NODE_CDATA_SECTION */ dom_select_push_text,
/* DOM_NODE_ENTITY_REFERENCE */ dom_select_push_text,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
};
dom_stack_callback_T dom_select_pop_callbacks[DOM_NODES] = {
/* */ NULL,
/* DOM_NODE_ELEMENT */ dom_select_pop_element,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ NULL,
/* DOM_NODE_CDATA_SECTION */ NULL,
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
};
struct dom_node_list *
select_dom_nodes(struct dom_select *select, struct dom_node *root)
{
struct dom_select_data select_data;
struct dom_stack stack;
size_t obj_size = sizeof(struct dom_select_state);
memset(&select_data, 0, sizeof(select_data));
select_data.select = select;;
init_dom_stack(&stack, &select_data,
dom_select_push_callbacks,
dom_select_pop_callbacks, 0, 1);
init_dom_stack(&select_data.stack, &select_data, NULL, NULL,
obj_size, 1);
if (push_dom_node(&select_data.stack, &select->selector->node)) {
get_dom_stack_top(&select_data.stack)->immutable = 1;
walk_dom_nodes(&stack, root);
}
done_dom_stack(&select_data.stack);
done_dom_stack(&stack);
return select_data.list;
}

357
src/document/dom/select.h Normal file
View File

@ -0,0 +1,357 @@
#ifndef EL__DOCUMENT_DOM_SELECT_H
#define EL__DOCUMENT_DOM_SELECT_H
#include "document/dom/node.h"
/* FIXME: Namespaces; *|E */
enum dom_select_element_match {
/* Gives info about the relation required between two element nodes for
* them to match. This is also referred to as combinators. */
/* The following are mutually exclusive and at least one must be set.
* DOM_SELECT_RELATION_DESCENDANT is the default. */
/* Matches any F descendant of E: E F */
/* Bogus flag; it is an easy way to have a default. */
DOM_SELECT_RELATION_DESCENDANT = 0,
/* Matches F being a direct child of E: E > F */
DOM_SELECT_RELATION_DIRECT_CHILD = 1,
/* Matches F immediate preceded by E: E + F */
DOM_SELECT_RELATION_DIRECT_ADJACENT = 2,
/* Matches F preceded by E: E ~ F */
DOM_SELECT_RELATION_INDIRECT_ADJACENT = 4,
DOM_SELECT_RELATION_FLAGS = DOM_SELECT_RELATION_DESCENDANT
| DOM_SELECT_RELATION_DIRECT_CHILD
| DOM_SELECT_RELATION_DIRECT_ADJACENT
| DOM_SELECT_RELATION_INDIRECT_ADJACENT,
/* None of the following are mutual exclusive. They can co-exist
* although combining them might not make a lot of sense. */
/* Matches any element: * */
DOM_SELECT_ELEMENT_UNIVERSAL = 8,
/* Matches the root node of the document: :root or // */
DOM_SELECT_ELEMENT_ROOT = 16,
/* Matches the empty element (not even text): :empty */
DOM_SELECT_ELEMENT_EMPTY = 32,
/* Matches the some n-th child of its parent: :nth-child(n), etc. */
DOM_SELECT_ELEMENT_NTH_CHILD = 64,
/* Matches the some n-th sibling of its type: :nth-of-type(n), etc. */
DOM_SELECT_ELEMENT_NTH_TYPE = 128,
};
/* The special CSS .bar class attribute syntax is represented as
* E[class="bar"]. The ID flag will match against any attribute with it's
* boolean id member set. XXX: These flags are ATM mutual exclusive. */
enum dom_select_attribute_match {
/* Matches any set value: E[foo] */
DOM_SELECT_ATTRIBUTE_ANY = 1,
/* Matches exact value "bar": E[foo="bar"] */
DOM_SELECT_ATTRIBUTE_EXACT = 2,
/* Matches space seprated list "z bar bee": E[foo~="bar"] */
DOM_SELECT_ATTRIBUTE_SPACE_LIST = 4,
/* Matches hyphen separated list "z-bar-bee": E[foo|="bar"] */
DOM_SELECT_ATTRIBUTE_HYPHEN_LIST = 8,
/* Matches value begining; "bar-z-bee": E[foo^="bar"]*/
DOM_SELECT_ATTRIBUTE_BEGIN = 16,
/* Matches value ending; "z-bee-bar": E[foo$="bar"] */
DOM_SELECT_ATTRIBUTE_END = 32,
/* Matches value containing; "m33p/bar\++": E[foo*="bar"] */
DOM_SELECT_ATTRIBUTE_CONTAINS = 64,
/* Matches exact ID attribute value "bar": #bar */
DOM_SELECT_ATTRIBUTE_ID = 128,
};
/* Info about text matching is stored in a DOM text node. */
enum dom_select_text_match {
/* Matches E containing substring "foo": E:contains("foo") */
DOM_SELECT_TEXT_CONTAINS = 1,
};
/* Info about what nth child or type to match. The basic syntax is:
*
* <step>n<index>
*
* with a little syntactic sugar.
*
* Examples:
*
* 0n+1 / 1 is first child (same as :first-child)
* 2n+0 / 2n / even is all even children
* 2n+1 / odd is all odd children
* -0n+2 is the last two children
* -0n+1 / -1 is last child (same as :last-child)
* 1n+0 / n+0 / n is all elements of type
* 0n+0 is only element of type (a special internal syntax
* used when storing nth-info)
*
* That is, a zero step (0n) means exact indexing, and non-zero step
* means stepwise indexing.
*/
struct dom_select_nth_match {
size_t step;
size_t index;
};
#define set_dom_select_nth_match(nth, nthstep, nthindex) \
do { (nth)->step = (nthstep); (nth)->index = (nthindex); } while(0)
/* This is supposed to be a simple selector. However, this struct is also used
* for holding data for attribute matching and element text matching. */
struct dom_select_node {
/* This holds the DOM node which has data about the node being matched.
* It can be either an element, attribute, and a text node. */
/* XXX: Keep at the top. This is used for translating dom_node
* reference to dom_select_node. */
struct dom_node node;
/* Only meaningful for element nodes. */
/* FIXME: Don't waste memory for non-element nodes. */
struct dom_select_nth_match nth_child;
struct dom_select_nth_match nth_type;
/* Flags, specifying how the matching should be done. */
union {
enum dom_select_element_match element;
enum dom_select_attribute_match attribute;
enum dom_select_text_match text;
} match;
};
enum dom_select_pseudo {
DOM_SELECT_PSEUDO_UNKNOWN = 0,
/* Pseudo-elements: */
/* Matches first formatted line: ::first-line */
DOM_SELECT_PSEUDO_FIRST_LINE = 1,
/* Matches first formatted letter: ::first-letter */
DOM_SELECT_PSEUDO_FIRST_LETTER = 2,
/* Matches text selected by user: ::selection */
DOM_SELECT_PSEUDO_SELECTION = 4,
/* Matches generated context after an element: ::after */
DOM_SELECT_PSEUDO_AFTER = 8,
/* Matches generated content before an element: ::before */
DOM_SELECT_PSEUDO_BEFORE = 16,
/* Pseudo-attributes: */
/* Link pseudo-classes: */
DOM_SELECT_PSEUDO_LINK = 32, /* :link */
DOM_SELECT_PSEUDO_VISITED = 64, /* :visited */
/* User action pseudo-classes: */
DOM_SELECT_PSEUDO_ACTIVE = 128, /* :active */
DOM_SELECT_PSEUDO_HOVER = 256, /* :hover */
DOM_SELECT_PSEUDO_FOCUS = 512, /* :focus */
/* Target pseudo-class: */
DOM_SELECT_PSEUDO_TARGET = 1024, /* :target */
/* UI element states pseudo-classes: */
DOM_SELECT_PSEUDO_ENABLED = 2048, /* :enabled */
DOM_SELECT_PSEUDO_DISABLED = 4096, /* :disabled */
DOM_SELECT_PSEUDO_CHECKED = 8192, /* :checked */
DOM_SELECT_PSEUDO_INDETERMINATE = 16384, /* :indeterminate */
/* XXX: The following pseudo-classes are not kept in the pseudo member
* of the dom_select struct so they should not be bitfields. They are
* mostly for parsing purposes. */
DOM_SELECT_PSEUDO_CONTAINS = 10000,
DOM_SELECT_PSEUDO_NTH_CHILD,
DOM_SELECT_PSEUDO_NTH_LAST_CHILD,
DOM_SELECT_PSEUDO_FIRST_CHILD,
DOM_SELECT_PSEUDO_LAST_CHILD,
DOM_SELECT_PSEUDO_ONLY_CHILD,
DOM_SELECT_PSEUDO_NTH_TYPE,
DOM_SELECT_PSEUDO_NTH_LAST_TYPE,
DOM_SELECT_PSEUDO_FIRST_TYPE,
DOM_SELECT_PSEUDO_LAST_TYPE,
DOM_SELECT_PSEUDO_ONLY_TYPE,
DOM_SELECT_PSEUDO_ROOT,
DOM_SELECT_PSEUDO_EMPTY,
};
struct dom_select {
struct dom_select_node *selector;
unsigned long specificity;
enum dom_select_pseudo pseudo;
};
enum dom_select_syntax {
DOM_SELECT_SYNTAX_CSS, /* Example: 'p a[id=node] a:hover */
DOM_SELECT_SYNTAX_PATH, /* Example: '//rss/channel/item' */
};
struct dom_select *init_dom_select(enum dom_select_syntax syntax,
unsigned char *string, int length);
void done_dom_select(struct dom_select *select);
struct dom_node_list *
select_dom_nodes(struct dom_select *select, struct dom_node *root);
/*
* +------------------------------------------------------------------------------------+
* | Pattern | Meaning | Type | Version |
* |-----------------------+------------------------------+-------------------+---------|
* | * | any element | Universal | 2 |
* | | | selector | |
* |-----------------------+------------------------------+-------------------+---------|
* | E | an element of type E | Type selector | 1 |
* |-----------------------+------------------------------+-------------------+---------|
* | E F | an F element descendant of | Descendant | 1 |
* | | an E element | combinator | |
* |-----------------------+------------------------------+-------------------+---------|
* | E > F | an F element child of an E | Child combinator | 2 |
* | | element | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E + F | an F element immediately | Direct adjacent | 2 |
* | | preceded by an E element | combinator | |
* |-----------------------+------------------------------+-------------------+---------|
* | E ~ F | an F element preceded by an | Indirect adjacent | 3 |
* | | E element | combinator | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:root | an E element, root of the | Structural | 3 |
* | | document | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element that has no | Structural | |
* | E:empty | children (including text | pseudo-classes | 3 |
* | | nodes) | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:first-child | an E element, first child of | Structural | 2 |
* | | its parent | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:last-child | an E element, last child of | Structural | 3 |
* | | its parent | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:nth-child(n) | an E element, the n-th child | Structural | 3 |
* | | of its parent | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element, the n-th child | Structural | |
* | E:nth-last-child(n) | of its parent, counting from | pseudo-classes | 3 |
* | | the last one | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:first-of-type | an E element, first sibling | Structural | 3 |
* | | of its type | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:last-of-type | an E element, last sibling | Structural | 3 |
* | | of its type | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:nth-of-type(n) | an E element, the n-th | Structural | 3 |
* | | sibling of its type | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element, the n-th | Structural | |
* | E:nth-last-of-type(n) | sibling of its type, | pseudo-classes | 3 |
* | | counting from the last one | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:only-child | an E element, only child of | Structural | 3 |
* | | its parent | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:only-of-type | an E element, only sibling | Structural | 3 |
* | | of its type | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element being the | | |
* | E:link | source anchor of a hyperlink | The link | |
* | E:visited | of which the target is not | pseudo-classes | 1 |
* | | yet visited (:link) or | | |
* | | already visited (:visited) | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:active | an E element during certain | The user action | |
* | E:hover | user actions | pseudo-classes | 1 and 2 |
* | E:focus | | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:target | an E element being the | The target | 3 |
* | | target of the referring URI | pseudo-class | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an element of type E in | | |
* | E:lang(fr) | language "fr" (the document | The :lang() | 2 |
* | FIXME | language specifies how | pseudo-class | |
* | | language is determined) | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:enabled | a user interface element E | The UI element | |
* | E:disabled | which is enabled or disabled | states | 3 |
* | | | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | | a user interface element E | | |
* | E:checked | which is checked or in an | The UI element | |
* | E:indeterminate | indeterminate state (for | states | 3 |
* | | instance a radio-button or | pseudo-classes | |
* | | checkbox) | | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element containing the | Content | |
* | E:contains("foo") | substring "foo" in its | pseudo-class | 3 |
* | | textual contents | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E::first-line | the first formatted line of | The :first-line | 1 |
* | | an E element | pseudo-element | |
* |-----------------------+------------------------------+-------------------+---------|
* | E::first-letter | the first formatted letter | The :first-letter | 1 |
* | | of an E element | pseudo-element | |
* |-----------------------+------------------------------+-------------------+---------|
* | | the portion of an E element | The UI element | |
* | E::selection | that is currently | fragments | 3 |
* | | selected/highlighted by the | pseudo-elements | |
* | | user | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E::before | generated content before an | The :before | 2 |
* | | E element | pseudo-element | |
* |-----------------------+------------------------------+-------------------+---------|
* | E::after | generated content after an E | The :after | 2 |
* | | element | pseudo-element | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element whose class is | | |
* | E.warning | "warning" (the document | Class selectors | 1 |
* | | language specifies how class | | |
* | | is determined). | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E#myid | an E element with ID equal | ID selectors | 1 |
* | | to "myid". | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E[foo] | an E element with a "foo" | Attribute | 2 |
* | | attribute | selectors | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element whose "foo" | Attribute | |
* | E[foo="bar"] | attribute value is exactly | selectors | 2 |
* | | equal to "bar" | | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element whose "foo" | | |
* | | attribute value is a list of | Attribute | |
* | E[foo~="bar"] | space-separated values, one | selectors | 2 |
* | | of which is exactly equal to | | |
* | | "bar" | | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element whose "foo" | | |
* | E[foo^="bar"] | attribute value begins | Attribute | 3 |
* | | exactly with the string | selectors | |
* | | "bar" | | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element whose "foo" | Attribute | |
* | E[foo$="bar"] | attribute value ends exactly | selectors | 3 |
* | | with the string "bar" | | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element whose "foo" | Attribute | |
* | E[foo*="bar"] | attribute value contains the | selectors | 3 |
* | | substring "bar" | | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element whose | | |
* | | "hreflang" attribute has a | Attribute | |
* | E[hreflang|="en"] | hyphen-separated list of | selectors | 2 |
* | | values beginning (from the | | |
* | | left) with "en" | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:not(s) | an E element that does not | Negation | 3 |
* | FIXME | match simple selector s | pseudo-class | |
* +------------------------------------------------------------------------------------+
*/
#endif