1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-11-04 08:17:17 -05:00
elinks/src/dom/select.c

1083 lines
28 KiB
C

/* DOM node selection */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "elinks.h"
#include "dom/css/scanner.h"
#include "dom/dom.h"
#include "dom/node.h"
#include "dom/scanner.h"
#include "dom/select.h"
#include "dom/stack.h"
#include "dom/string.h"
#include "util/memory.h"
/* Selector parsing: */
/* Maps the content of a scanner token to a pseudo-class or -element ID. */
static enum dom_select_pseudo
get_dom_select_pseudo(struct dom_scanner_token *token)
{
static struct {
struct dom_string string;
enum dom_select_pseudo pseudo;
} pseudo_info[] = {
#define INIT_DOM_SELECT_PSEUDO_STRING(str, type) \
{ INIT_DOM_STRING(str, -1), DOM_SELECT_PSEUDO_##type }
INIT_DOM_SELECT_PSEUDO_STRING("first-line", FIRST_LINE),
INIT_DOM_SELECT_PSEUDO_STRING("first-letter", FIRST_LETTER),
INIT_DOM_SELECT_PSEUDO_STRING("selection", SELECTION),
INIT_DOM_SELECT_PSEUDO_STRING("after", AFTER),
INIT_DOM_SELECT_PSEUDO_STRING("before", BEFORE),
INIT_DOM_SELECT_PSEUDO_STRING("link", LINK),
INIT_DOM_SELECT_PSEUDO_STRING("visited", VISITED),
INIT_DOM_SELECT_PSEUDO_STRING("active", ACTIVE),
INIT_DOM_SELECT_PSEUDO_STRING("hover", HOVER),
INIT_DOM_SELECT_PSEUDO_STRING("focus", FOCUS),
INIT_DOM_SELECT_PSEUDO_STRING("target", TARGET),
INIT_DOM_SELECT_PSEUDO_STRING("enabled", ENABLED),
INIT_DOM_SELECT_PSEUDO_STRING("disabled", DISABLED),
INIT_DOM_SELECT_PSEUDO_STRING("checked", CHECKED),
INIT_DOM_SELECT_PSEUDO_STRING("indeterminate", INDETERMINATE),
/* Content pseudo-classes: */
INIT_DOM_SELECT_PSEUDO_STRING("contains", CONTAINS),
/* Structural pseudo-classes: */
INIT_DOM_SELECT_PSEUDO_STRING("nth-child", NTH_CHILD),
INIT_DOM_SELECT_PSEUDO_STRING("nth-last-child", NTH_LAST_CHILD),
INIT_DOM_SELECT_PSEUDO_STRING("first-child", FIRST_CHILD),
INIT_DOM_SELECT_PSEUDO_STRING("last-child", LAST_CHILD),
INIT_DOM_SELECT_PSEUDO_STRING("only-child", ONLY_CHILD),
INIT_DOM_SELECT_PSEUDO_STRING("nth-of-type", NTH_TYPE),
INIT_DOM_SELECT_PSEUDO_STRING("nth-last-of-type",NTH_LAST_TYPE),
INIT_DOM_SELECT_PSEUDO_STRING("first-of-type", FIRST_TYPE),
INIT_DOM_SELECT_PSEUDO_STRING("last-of-type", LAST_TYPE),
INIT_DOM_SELECT_PSEUDO_STRING("only-of-type", ONLY_TYPE),
INIT_DOM_SELECT_PSEUDO_STRING("root", ROOT),
INIT_DOM_SELECT_PSEUDO_STRING("empty", EMPTY),
#undef INIT_DOM_SELECT_PSEUDO_STRING
};
int i;
for (i = 0; i < sizeof_array(pseudo_info); i++)
if (!dom_string_casecmp(&pseudo_info[i].string, &token->string))
return pseudo_info[i].pseudo;
return DOM_SELECT_PSEUDO_UNKNOWN;
}
/* Parses attribute selector. For example '[foo="bar"]' or '[foo|="boo"]'. */
static enum dom_exception_code
parse_dom_select_attribute(struct dom_select_node *sel, struct dom_scanner *scanner)
{
struct dom_scanner_token *token = get_dom_scanner_token(scanner);
/* Get '['. */
if (token->type != '[')
return DOM_ERR_INVALID_STATE;
/* Get the attribute name. */
token = get_next_dom_scanner_token(scanner);
if (!token || token->type != CSS_TOKEN_IDENT)
return DOM_ERR_SYNTAX;
copy_dom_string(&sel->node.string, &token->string);
/* Get the optional '=' combo or ending ']'. */
token = get_next_dom_scanner_token(scanner);
if (!token) return DOM_ERR_SYNTAX;
switch (token->type) {
case ']':
sel->match.attribute |= DOM_SELECT_ATTRIBUTE_ANY;
return DOM_ERR_NONE;
case CSS_TOKEN_SELECT_SPACE_LIST:
sel->match.attribute |= DOM_SELECT_ATTRIBUTE_SPACE_LIST;
break;
case CSS_TOKEN_SELECT_HYPHEN_LIST:
sel->match.attribute |= DOM_SELECT_ATTRIBUTE_HYPHEN_LIST;
break;
case CSS_TOKEN_SELECT_BEGIN:
sel->match.attribute |= DOM_SELECT_ATTRIBUTE_BEGIN;
break;
case CSS_TOKEN_SELECT_END:
sel->match.attribute |= DOM_SELECT_ATTRIBUTE_END;
break;
case CSS_TOKEN_SELECT_CONTAINS:
sel->match.attribute |= DOM_SELECT_ATTRIBUTE_CONTAINS;
break;
default:
return DOM_ERR_SYNTAX;
}
/* Get the required value. */
token = get_next_dom_scanner_token(scanner);
if (!token) return DOM_ERR_SYNTAX;
switch (token->type) {
case CSS_TOKEN_IDENT:
case CSS_TOKEN_STRING:
copy_dom_string(&sel->node.data.attribute.value, &token->string);
break;
default:
return DOM_ERR_SYNTAX;
}
/* Get the ending ']'. */
token = get_next_dom_scanner_token(scanner);
if (token && token->type == ']')
return DOM_ERR_NONE;
return DOM_ERR_SYNTAX;
}
/* Parse:
*
* 0n+1 / 1
* 2n+0 / 2n
* 2n+1
* -0n+2
* -0n+1 / -1
* 1n+0 / n+0 / n
* 0n+0
*/
/* FIXME: Move somewhere else? dom/scanner.h? */
static size_t
get_scanner_token_number(struct dom_scanner_token *token)
{
size_t number = 0;
while (token->string.length > 0 && isdigit(token->string.string[0])) {
size_t old_number = number;
number *= 10;
/* -E2BIG */
if (old_number > number)
return -1;
number += token->string.string[0] - '0';
skip_dom_scanner_token_char(token);
}
return number;
}
/* Parses the '(...)' part of ':nth-of-type(...)' and ':nth-child(...)'. */
static enum dom_exception_code
parse_dom_select_nth_arg(struct dom_select_nth_match *nth, struct dom_scanner *scanner)
{
struct dom_scanner_token *token = get_next_dom_scanner_token(scanner);
int sign = 1;
int number = -1;
if (!token || token->type != '(')
return DOM_ERR_SYNTAX;
token = get_next_dom_scanner_token(scanner);
if (!token)
return DOM_ERR_SYNTAX;
switch (token->type) {
case CSS_TOKEN_IDENT:
if (dom_scanner_token_contains(token, "even")) {
nth->step = 2;
nth->index = 0;
} else if (dom_scanner_token_contains(token, "odd")) {
nth->step = 2;
nth->index = 1;
} else {
/* Check for 'n' ident below. */
break;
}
if (skip_css_tokens(scanner, ')'))
return DOM_ERR_NONE;
return DOM_ERR_SYNTAX;
case '-':
sign = -1;
token = get_next_dom_scanner_token(scanner);
if (!token) return DOM_ERR_SYNTAX;
if (token->type != CSS_TOKEN_IDENT)
break;
if (token->type != CSS_TOKEN_NUMBER)
return DOM_ERR_SYNTAX;
/* Fall-through */
case CSS_TOKEN_NUMBER:
number = get_scanner_token_number(token);
if (number < 0)
return DOM_ERR_INVALID_STATE;
token = get_next_dom_scanner_token(scanner);
if (!token) return DOM_ERR_SYNTAX;
break;
default:
return DOM_ERR_SYNTAX;
}
/* The rest can contain n+ part */
switch (token->type) {
case CSS_TOKEN_IDENT:
if (!dom_scanner_token_contains(token, "n"))
return DOM_ERR_SYNTAX;
nth->step = sign * number;
token = get_next_dom_scanner_token(scanner);
if (!token) return DOM_ERR_SYNTAX;
if (token->type != '+')
break;
token = get_next_dom_scanner_token(scanner);
if (!token) return DOM_ERR_SYNTAX;
if (token->type != CSS_TOKEN_NUMBER)
break;
number = get_scanner_token_number(token);
if (number < 0)
return DOM_ERR_INVALID_STATE;
nth->index = sign * number;
break;
default:
nth->step = 0;
nth->index = sign * number;
}
if (skip_css_tokens(scanner, ')'))
return DOM_ERR_NONE;
return DOM_ERR_SYNTAX;
}
/* Parse a pseudo-class or -element with the syntax: ':<ident>'. */
static enum dom_exception_code
parse_dom_select_pseudo(struct dom_select *select, struct dom_select_node *sel,
struct dom_scanner *scanner)
{
struct dom_scanner_token *token = get_dom_scanner_token(scanner);
enum dom_select_pseudo pseudo;
enum dom_exception_code code;
/* Skip double :'s in front of some pseudo's (::first-line, etc.) */
do {
token = get_next_dom_scanner_token(scanner);
} while (token && token->type == ':');
if (!token || token->type != CSS_TOKEN_IDENT)
return DOM_ERR_SYNTAX;
pseudo = get_dom_select_pseudo(token);
switch (pseudo) {
case DOM_SELECT_PSEUDO_UNKNOWN:
return DOM_ERR_NOT_FOUND;
case DOM_SELECT_PSEUDO_CONTAINS:
/* FIXME: E:contains("text") */
break;
case DOM_SELECT_PSEUDO_NTH_CHILD:
case DOM_SELECT_PSEUDO_NTH_LAST_CHILD:
code = parse_dom_select_nth_arg(&sel->nth_child, scanner);
if (code != DOM_ERR_NONE)
return code;
sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
break;
case DOM_SELECT_PSEUDO_FIRST_CHILD:
sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
set_dom_select_nth_match(&sel->nth_child, 0, 1);
break;
case DOM_SELECT_PSEUDO_LAST_CHILD:
sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
set_dom_select_nth_match(&sel->nth_child, 0, -1);
break;
case DOM_SELECT_PSEUDO_ONLY_CHILD:
sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
set_dom_select_nth_match(&sel->nth_child, 0, 0);
break;
case DOM_SELECT_PSEUDO_NTH_TYPE:
case DOM_SELECT_PSEUDO_NTH_LAST_TYPE:
code = parse_dom_select_nth_arg(&sel->nth_type, scanner);
if (code != DOM_ERR_NONE)
return code;
sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
break;
case DOM_SELECT_PSEUDO_FIRST_TYPE:
sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
set_dom_select_nth_match(&sel->nth_type, 0, 1);
break;
case DOM_SELECT_PSEUDO_LAST_TYPE:
sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
set_dom_select_nth_match(&sel->nth_type, 0, -1);
break;
case DOM_SELECT_PSEUDO_ONLY_TYPE:
sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
set_dom_select_nth_match(&sel->nth_type, 0, 0);
break;
case DOM_SELECT_PSEUDO_ROOT:
sel->match.element |= DOM_SELECT_ELEMENT_ROOT;
break;
case DOM_SELECT_PSEUDO_EMPTY:
sel->match.element |= DOM_SELECT_ELEMENT_EMPTY;
break;
default:
/* It's a bitflag! */
select->pseudo |= pseudo;
}
return DOM_ERR_NONE;
}
/* The element relation flags are mutual exclusive. This macro can be used
* for checking if anyflag is set. */
#define get_element_relation(sel) \
((sel)->match.element & DOM_SELECT_RELATION_FLAGS)
/* Parse a CSS3 selector and add selector nodes to the @select struct. */
static enum dom_exception_code
parse_dom_select(struct dom_select *select, struct dom_stack *stack,
struct dom_string *string)
{
struct dom_scanner scanner;
struct dom_select_node sel;
init_dom_scanner(&scanner, &dom_css_scanner_info, string, 0, 0, 1, 0);
memset(&sel, 0, sizeof(sel));
while (dom_scanner_has_tokens(&scanner)) {
struct dom_scanner_token *token = get_dom_scanner_token(&scanner);
enum dom_exception_code code;
struct dom_select_node *select_node;
assert(token);
if (token->type == '{'
|| token->type == '}'
|| token->type == ';'
|| token->type == ',')
break;
/* Examine the selector fragment */
switch (token->type) {
case CSS_TOKEN_IDENT:
sel.node.type = DOM_NODE_ELEMENT;
copy_dom_string(&sel.node.string, &token->string);
if (dom_scanner_token_contains(token, "*"))
sel.match.element |= DOM_SELECT_ELEMENT_UNIVERSAL;
break;
case CSS_TOKEN_HASH:
case CSS_TOKEN_HEX_COLOR:
/* ID fragment */
sel.node.type = DOM_NODE_ATTRIBUTE;
sel.match.attribute |= DOM_SELECT_ATTRIBUTE_ID;
/* Skip the leading '#'. */
skip_dom_scanner_token_char(token);
break;
case '[':
sel.node.type = DOM_NODE_ATTRIBUTE;
code = parse_dom_select_attribute(&sel, &scanner);
if (code != DOM_ERR_NONE)
return code;
break;
case '.':
token = get_next_dom_scanner_token(&scanner);
if (!token || token->type != CSS_TOKEN_IDENT)
return DOM_ERR_SYNTAX;
sel.node.type = DOM_NODE_ATTRIBUTE;
sel.match.attribute |= DOM_SELECT_ATTRIBUTE_SPACE_LIST;
set_dom_string(&sel.node.string, "class", -1);
copy_dom_string(&sel.node.data.attribute.value, &token->string);
break;
case ':':
code = parse_dom_select_pseudo(select, &sel, &scanner);
if (code != DOM_ERR_NONE)
return code;
break;
case '>':
if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT)
return DOM_ERR_SYNTAX;
sel.match.element |= DOM_SELECT_RELATION_DIRECT_CHILD;
break;
case '+':
if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT)
return DOM_ERR_SYNTAX;
sel.match.element |= DOM_SELECT_RELATION_DIRECT_ADJACENT;
break;
case '~':
if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT)
return DOM_ERR_SYNTAX;
sel.match.element |= DOM_SELECT_RELATION_INDIRECT_ADJACENT;
break;
default:
return DOM_ERR_SYNTAX;
}
skip_dom_scanner_token(&scanner);
if (sel.node.type == DOM_NODE_UNKNOWN)
continue;
select_node = mem_calloc(1, sizeof(*select_node));
copy_struct(select_node, &sel);
if (!dom_stack_is_empty(stack)) {
struct dom_node *node = &select_node->node;
struct dom_node *parent = get_dom_stack_top(stack)->node;
struct dom_node_list **list = get_dom_node_list(parent, node);
int sort = (node->type == DOM_NODE_ATTRIBUTE);
int index;
assertm(list, "Adding node to bad parent [%d -> %d]",
node->type, parent->type);
index = *list && (*list)->size > 0 && sort
? get_dom_node_map_index(*list, node) : -1;
if (!add_to_dom_node_list(list, node, index)) {
done_dom_node(node);
return DOM_ERR_INVALID_STATE;
}
node->parent = parent;
} else {
assert(!select->selector);
select->selector = select_node;
}
if (!push_dom_node(stack, &select_node->node))
return DOM_ERR_INVALID_STATE;
if (select_node->node.type != DOM_NODE_ELEMENT)
pop_dom_node(stack);
memset(&sel, 0, sizeof(sel));
}
if (select->selector)
return DOM_ERR_NONE;
return DOM_ERR_INVALID_STATE;
}
/* Basically this is just a wrapper for parse_dom_select() to ease error
* handling. */
struct dom_select *
init_dom_select(enum dom_select_syntax syntax, struct dom_string *string)
{
struct dom_select *select = mem_calloc(1, sizeof(select));
struct dom_stack stack;
enum dom_exception_code code;
init_dom_stack(&stack, DOM_STACK_FLAG_NONE);
add_dom_stack_tracer(&stack, "init-select: ");
code = parse_dom_select(select, &stack, string);
done_dom_stack(&stack);
if (code == DOM_ERR_NONE)
return select;
done_dom_select(select);
return NULL;
}
void
done_dom_select(struct dom_select *select)
{
if (select->selector) {
struct dom_node *node = (struct dom_node *) select->selector;
/* This will recursively free all children select nodes. */
done_dom_node(node);
}
mem_free(select);
}
/* DOM node selection: */
/* This struct stores data related to the 'application' of a DOM selector
* on a DOM tree or stream. */
struct dom_select_data {
/* The selector matching stack. The various selector nodes are pushed
* onto this stack as they are matched (and later popped when they are
* no longer 'reachable', that is, has been popped from the DOM tree or
* stream. This way the selector can match each selector node multiple
* times and the selection is a simple matter of matching the current
* node against each state on this stack. */
struct dom_stack stack;
/* Reference to the selector. */
struct dom_select *select;
/* The list of nodes who have been matched / selected. */
struct dom_node_list *list;
};
/* This state struct is used for the select data stack and holds info about the
* node that was matched. */
struct dom_select_state {
/* The matched node. This is always an element node. */
struct dom_node *node;
};
/* Get a child node of a given type. By design, a selector node can
* only have one child per type of node. */
static struct dom_select_node *
get_child_dom_select_node(struct dom_select_node *selector,
enum dom_node_type type)
{
struct dom_node_list *children = selector->node.data.element.children;
struct dom_node *node;
int index;
if (!children)
return NULL;
foreach_dom_node (children, node, index) {
if (node->type == type)
return (struct dom_select_node *) node;
}
return NULL;
}
#define has_attribute_match(selector, name) \
((selector)->match.attribute & (name))
static int
match_attribute_value(struct dom_select_node *selector, struct dom_node *node)
{
struct dom_string str;
struct dom_string *selvalue = &selector->node.data.attribute.value;
struct dom_string *value = &node->data.attribute.value;
unsigned char separator;
int do_compare;
assert(selvalue->length);
/* The attribute selector value should atleast be contained in the
* attribute value. */
if (value->length < selvalue->length)
return 0;
/* The following three matching methods requires the selector value to
* match a substring at a well-defined offset. */
if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_EXACT)) {
return !dom_string_casecmp(value, selvalue);
}
if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_BEGIN)) {
set_dom_string(&str, value->string, selvalue->length);
return !dom_string_casecmp(&str, selvalue);
}
if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_END)) {
size_t offset = value->length - selvalue->length;
set_dom_string(&str, value->string + offset, selvalue->length);
return !dom_string_casecmp(&str, selvalue);
}
/* The 3 following matching methods requires the selector value to be a
* substring of the value enclosed in a specific separator (with the
* begining and ending of the attribute value both being valid
* separators). */
set_dom_string(&str, value->string, value->length);
if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_HYPHEN_LIST)) {
separator = '-';
} else if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_CONTAINS)) {
separator = '\0';
} if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_SPACE_LIST)) {
separator = ' ';
} else {
INTERNAL("No attribute selector matching method defined");
return 0;
}
do_compare = 1;
do {
if (do_compare
&& !dom_string_ncasecmp(&str, selvalue, selvalue->length)) {
/* "Contains" matches no matter what comes after. */
if (str.length == selvalue->length)
return 1;
switch (separator) {
case '\0':
/* "Contains" matches no matter what comes after. */
return 1;
case '-':
if (str.string[str.length] == separator)
return 1;
break;
default:
if (isspace(str.string[str.length]))
return 1;
}
}
switch (separator) {
case '\0':
do_compare = 1;
break;
case '-':
do_compare = (str.string[0] == '-');
break;
default:
do_compare = isspace(str.string[0]);
}
str.length--, str.string++;
} while (str.length >= selvalue->length);
return 0;
}
/* Match the attribute of an element @node against attribute selector nodes
* of a given @base. */
static int
match_attribute_selectors(struct dom_select_node *base, struct dom_node *node)
{
struct dom_node_list *attrs = node->data.element.map;
struct dom_node_list *selnodes = base->node.data.element.map;
struct dom_node *selnode;
size_t index;
assert(base->node.type == DOM_NODE_ELEMENT
&& node->type == DOM_NODE_ELEMENT);
/* If there are no attribute selectors that is a clean match ... */
if (!selnodes)
return 1;
/* ... the opposite goes if there are no attributes to match. */
if (!attrs)
return 0;
foreach_dom_node (selnodes, selnode, index) {
struct dom_select_node *selector = (void *) selnode;
struct dom_node *attr;
if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_ID)) {
size_t idindex;
attr = NULL;
foreach_dom_node (attrs, attr, idindex) {
if (attr->data.attribute.id)
break;
}
if (!is_dom_node_list_member(attrs, idindex))
attr = NULL;
} else {
attr = get_dom_node_map_entry(attrs, DOM_NODE_ATTRIBUTE,
selnode->data.attribute.type,
&selnode->string);
}
if (!attr)
return 0;
if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_ANY))
continue;
if (!match_attribute_value(selector, attr))
return 0;
}
return 1;
}
/* XXX: Assume the first context is the one! */
#define get_dom_select_state(stack, state) \
((struct dom_select_state *) get_dom_stack_state_data((stack)->contexts[0], state))
static int
match_element_relation(struct dom_select_node *selector, struct dom_node *node,
struct dom_stack *stack)
{
struct dom_stack_state *state;
enum dom_select_element_match relation = get_element_relation(selector);
int i, index;
assert(relation);
/* When matching any relation there must be a parent, either so that
* the node is a descendant or it is possible to check for siblings. */
if (!node->parent)
return 0;
if (relation != DOM_SELECT_RELATION_DIRECT_CHILD) {
/* When looking for preceeding siblings of the current node,
* the current node cannot be first or not in the list (-1). */
index = get_dom_node_list_index(node->parent, node);
if (index < 1)
return 0;
} else {
index = -1;
}
/* Find states which hold the parent of the current selector
* and check if the parent selector's node is the parent of the
* current node. */
foreachback_dom_stack_state(stack, state, i) {
struct dom_node *selnode;
/* We are only interested in states which hold the parent of
* the current selector. */
if (state->node != selector->node.parent)
continue;
selnode = get_dom_select_state(stack, state)->node;
if (relation == DOM_SELECT_RELATION_DIRECT_CHILD) {
/* Check if the parent selector's node is the parent of
* the current node. */
if (selnode == node->parent)
return 1;
} else {
int sibindex;
/* Check if they are siblings. */
if (selnode->parent != node->parent)
continue;
sibindex = get_dom_node_list_index(node->parent, selnode);
if (relation == DOM_SELECT_RELATION_DIRECT_ADJACENT) {
/* Check if the sibling node immediately
* preceeds the current node. */
if (sibindex + 1 == index)
return 1;
} else { /* DOM_SELECT_RELATION_INDIRECT_ADJACENT */
/* Check if the sibling node preceeds the
* current node. */
if (sibindex < index)
return 1;
}
}
}
return 0;
}
#define has_element_match(selector, name) \
((selector)->match.element & (name))
static int
match_element_selector(struct dom_select_node *selector, struct dom_node *node,
struct dom_stack *stack)
{
assert(node && node->type == DOM_NODE_ELEMENT);
if (!has_element_match(selector, DOM_SELECT_ELEMENT_UNIVERSAL)
&& dom_node_casecmp(&selector->node, node))
return 0;
if (get_element_relation(selector) != DOM_SELECT_RELATION_DESCENDANT
&& !match_element_relation(selector, node, stack))
return 0;
/* Root nodes either have no parents or are the single child of the
* document node. */
if (has_element_match(selector, DOM_SELECT_ELEMENT_ROOT)
&& node->parent) {
if (node->parent->type != DOM_NODE_DOCUMENT
|| node->parent->data.document.children->size > 1)
return 0;
}
if (has_element_match(selector, DOM_SELECT_ELEMENT_EMPTY)
&& node->data.element.children
&& node->data.element.children->size > 0)
return 0;
if (has_element_match(selector, DOM_SELECT_ELEMENT_NTH_CHILD)) {
/* FIXME */
return 0;
}
if (has_element_match(selector, DOM_SELECT_ELEMENT_NTH_TYPE)) {
/* FIXME */
return 0;
}
/* Check attribute selectors. */
if (selector->node.data.element.map
&& !match_attribute_selectors(selector, node))
return 0;
return 1;
}
#define get_dom_select_data(stack) ((stack)->current->data)
/* Matches an element node being visited against the current selector stack. */
static void
dom_select_push_element(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_select_data *select_data = get_dom_select_data(stack);
struct dom_stack_state *state;
int pos;
foreach_dom_stack_state(&select_data->stack, state, pos) {
struct dom_select_node *selector = (void *) state->node;
/* FIXME: Since the same dom_select_node can be multiple times
* on the select_data->stack, cache what select nodes was
* matches so that it is only checked once. */
if (!match_element_selector(selector, node, &select_data->stack))
continue;
WDBG("Matched element: %.*s.", node->string.length, node->string.string);
/* This node is matched, so push the next selector node to
* match on the stack. */
selector = get_child_dom_select_node(selector, DOM_NODE_ELEMENT);
if (selector)
push_dom_node(&select_data->stack, &selector->node);
}
}
/* Ensures that nodes, no longer 'reachable' on the stack do not have any
* states associated with them on the select data stack. */
static void
dom_select_pop_element(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_select_data *select_data = get_dom_select_data(stack);
struct dom_stack_state *state;
int index;
stack = &select_data->stack;
foreachback_dom_stack_state (stack, state, index) {
struct dom_select_state *select_state;
select_state = get_dom_select_state(stack, state);
if (select_state->node == node) {
pop_dom_state(stack, state);
WDBG("Remove element.");
continue;
}
}
}
/* For now this is only for matching the ':contains(<string>)' pseudo-class.
* Any node which can contain text and thus characters from the given <string>
* are handled in this common callback. */
static void
dom_select_push_text(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_select_data *select_data = get_dom_select_data(stack);
struct dom_stack_state *state = get_dom_stack_top(&select_data->stack);
struct dom_select_node *selector = (void *) state->node;
struct dom_select_node *text_sel = get_child_dom_select_node(selector, DOM_NODE_TEXT);
struct dom_string *text;
WDBG("Text node: %d chars", node->string.length);
if (!text_sel)
return;
text = &text_sel->node.string;
switch (node->type) {
case DOM_NODE_TEXT:
case DOM_NODE_CDATA_SECTION:
case DOM_NODE_ENTITY_REFERENCE:
break;
default:
ERROR("Unhandled type");
}
}
/* Context info for interacting with the DOM tree or stream stack. */
static struct dom_stack_context_info dom_select_context_info = {
/* Object size: */ 0,
/* Push: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ dom_select_push_element,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ dom_select_push_text,
/* DOM_NODE_CDATA_SECTION */ dom_select_push_text,
/* DOM_NODE_ENTITY_REFERENCE */ dom_select_push_text,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
},
/* Pop: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ dom_select_pop_element,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ NULL,
/* DOM_NODE_CDATA_SECTION */ NULL,
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
}
};
/* Context info related to the private select data stack of matched nodes. */
static struct dom_stack_context_info dom_select_data_context_info = {
/* Object size: */ sizeof(struct dom_select_state),
/* Push: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ NULL,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ NULL,
/* DOM_NODE_CDATA_SECTION */ NULL,
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
},
/* Pop: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ NULL,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ NULL,
/* DOM_NODE_CDATA_SECTION */ NULL,
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
}
};
struct dom_node_list *
select_dom_nodes(struct dom_select *select, struct dom_node *root)
{
struct dom_select_data select_data;
struct dom_stack stack;
memset(&select_data, 0, sizeof(select_data));
select_data.select = select;;
init_dom_stack(&stack, DOM_STACK_FLAG_NONE);
add_dom_stack_context(&stack, &select_data,
&dom_select_context_info);
add_dom_stack_tracer(&stack, "select-tree: ");
init_dom_stack(&select_data.stack, DOM_STACK_FLAG_NONE);
add_dom_stack_context(&select_data.stack, &select_data,
&dom_select_data_context_info);
add_dom_stack_tracer(&select_data.stack, "select-match: ");
if (push_dom_node(&select_data.stack, &select->selector->node)) {
get_dom_stack_top(&select_data.stack)->immutable = 1;
walk_dom_nodes(&stack, root);
}
done_dom_stack(&select_data.stack);
done_dom_stack(&stack);
return select_data.list;
}