mirror of
https://github.com/rkd77/elinks.git
synced 2024-12-04 14:46:47 -05:00
487 lines
16 KiB
C
487 lines
16 KiB
C
/** DOM node module
|
|
*
|
|
* @file dom/node.h
|
|
*
|
|
* This module defines the various node and node list data structures
|
|
* and functionality to modify and access them, such as adding a node as
|
|
* a child to a given node and getting the text string of a node as
|
|
* defined by the DOM specification.
|
|
*
|
|
* @par Node hierarchy
|
|
*
|
|
* DOM documents are represented as a collection of nodes arranged in a
|
|
* hierarchic structure. At the root is either a #DOM_NODE_DOCUMENT or
|
|
* #DOM_NODE_DOCUMENT_FRAGMENT node, each of which may have multiple
|
|
* child nodes. There is a well-defined order that dictates which child
|
|
* nodes may be descendants of a given type of node. For example, text
|
|
* and attribute nodes can have no children, while elements node may
|
|
* have both attribute and element nodes as children but with each type
|
|
* in different node lists. The hierarchy is somewhat encoded in the
|
|
* type specific node data, however, certain node types also define
|
|
* "custom" node lists for conveniently storing additional "embedded"
|
|
* data, such as processing instruction nodes having an attribute node
|
|
* list for conveniently accessing variable-value pairs given for
|
|
* XML-specific processing instructions:
|
|
*
|
|
* @verbatim <?xml version="1.0"?> @endverbatim
|
|
*
|
|
* @par Node lists
|
|
*
|
|
* There are two types of list: unordered (the default) and
|
|
* alphabetically ordered (also called "maps"). Both types of list
|
|
* stores all contained nodes in the index-oriented #dom_node_list data
|
|
* structure.
|
|
*
|
|
* When inserting a node into a list, first use either
|
|
* #get_dom_node_list_index or #get_dom_node_map_index (depending on
|
|
* whether the list is unordered or ordered respectively) to calculate
|
|
* the index at which to insert the new node. Then use
|
|
* #add_to_dom_node_list to insert the node in the list at the given
|
|
* position. Alternatively (and mostly preferred), simply use
|
|
* #add_dom_node to have all of the above done automatically plus some
|
|
* additional checks.
|
|
*
|
|
* A variety of node list accessors are defined. The node structure does
|
|
* not define any "next" or "previous" members to get siblings due to
|
|
* reduce memory usage (this might have to change --jonas). Instead, use
|
|
* #get_dom_node_next and #get_dom_node_next to access siblings. To
|
|
* lookup the existence of a node in a sorted node list (map) use
|
|
* #get_dom_node_map_entry. If a specific and unique node subtype should
|
|
* be found use #get_dom_node_child that given a parent node will find a
|
|
* child node based on a specific child node type and subtype. Finally,
|
|
* list can be iterated in forward and reverse order using
|
|
* #foreach_dom_node and #foreachback_dom_node.
|
|
*/
|
|
|
|
#ifndef EL_DOM_NODE_H
|
|
#define EL_DOM_NODE_H
|
|
|
|
#include "dom/string.h"
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
struct dom_node_list;
|
|
struct dom_document;
|
|
|
|
/** DOM node types */
|
|
enum dom_node_type {
|
|
DOM_NODE_UNKNOWN = 0, /**< Node type used internally. */
|
|
|
|
DOM_NODE_ELEMENT = 1, /**< Element node */
|
|
DOM_NODE_ATTRIBUTE = 2, /**< Attribute node */
|
|
DOM_NODE_TEXT = 3, /**< Text node */
|
|
DOM_NODE_CDATA_SECTION = 4, /**< CData section node */
|
|
DOM_NODE_ENTITY_REFERENCE = 5, /**< Entity reference node */
|
|
DOM_NODE_ENTITY = 6, /**< Entity node */
|
|
DOM_NODE_PROCESSING_INSTRUCTION = 7, /**< Processing instruction node */
|
|
DOM_NODE_COMMENT = 8, /**< Comment node */
|
|
DOM_NODE_DOCUMENT = 9, /**< Document root node */
|
|
DOM_NODE_DOCUMENT_TYPE = 10, /**< Document type (DTD) node */
|
|
DOM_NODE_DOCUMENT_FRAGMENT = 11, /**< Document fragment node */
|
|
DOM_NODE_NOTATION = 12, /**< Notation node */
|
|
|
|
DOM_NODES /**< The number of DOM nodes */
|
|
};
|
|
|
|
/* Following is the node specific data structures. They may contain no
|
|
* more than 4 pointers or something equivalent. */
|
|
|
|
/* The document URI is stored in the string / length members. */
|
|
struct dom_document_node {
|
|
/* The document. */
|
|
struct dom_document *document;
|
|
|
|
/* The child nodes. May be NULL. Ordered like they where inserted. */
|
|
/* FIXME: Should be just one element (root) node reference. */
|
|
struct dom_node_list *children;
|
|
};
|
|
|
|
struct dom_id {
|
|
struct dom_string public_id;
|
|
struct dom_string system_id;
|
|
};
|
|
|
|
struct dom_doctype_subset_info {
|
|
struct dom_string internal;
|
|
struct dom_id external;
|
|
};
|
|
|
|
struct dom_document_type_node {
|
|
/* These are really maps and should be sorted alphabetically. */
|
|
struct dom_node_list *entities;
|
|
struct dom_node_list *notations;
|
|
|
|
/* The string/length members of dom_node hold the name of the document
|
|
* type "<!DOCTYPE {name} ...>". This holds the ids for the external
|
|
* subset and the string of the internal subset. */
|
|
struct dom_doctype_subset_infot *subset;
|
|
};
|
|
|
|
/* Element nodes are indexed nodes stored in node lists of either
|
|
* other child nodes or the root node. */
|
|
struct dom_element_node {
|
|
/* The child nodes. May be NULL. Ordered like they where inserted. */
|
|
struct dom_node_list *children;
|
|
|
|
/* Only element nodes can have attributes and element nodes can only be
|
|
* child nodes so the map is put here.
|
|
*
|
|
* The @map may be NULL if there are none. The @map nodes are sorted
|
|
* alphabetically according to the attributes name so it has fast
|
|
* lookup. */
|
|
struct dom_node_list *map;
|
|
|
|
/* For <xsl:stylesheet ...> elements this holds the offset of
|
|
* 'stylesheet' */
|
|
uint16_t namespace_offset;
|
|
|
|
/* Special implementation dependent type specifier for example
|
|
* containing an enum value representing the element to reduce string
|
|
* comparing and only do one fast find mapping. */
|
|
uint16_t type;
|
|
};
|
|
|
|
/* Attribute nodes are named nodes stored in a node map of an element node. */
|
|
struct dom_attribute_node {
|
|
/* The string that hold the attribute value. The @string / @length
|
|
* members of {struct dom_node} holds the name that identifies the node
|
|
* in the map. */
|
|
struct dom_string value;
|
|
|
|
/* For xml:lang="en" attributes this holds the offset of 'lang' */
|
|
uint16_t namespace_offset;
|
|
|
|
/* Special implementation dependent type specifier. For HTML it (will)
|
|
* contain an enum value representing the attribute HTML_CLASS, HTML_ID etc.
|
|
* to reduce string comparing and only do one fast find mapping. */
|
|
uint16_t type;
|
|
|
|
/* The attribute value is delimited by quotes. Can be NUL, ' or ". */
|
|
unsigned char quoted;
|
|
|
|
/* Was the attribute specified in the DTD as a default attribute or was
|
|
* it added from the document source. */
|
|
unsigned int specified:1;
|
|
|
|
/* Has the node->string been converted to internal charset. */
|
|
unsigned int converted:1;
|
|
|
|
/* Is the attribute a unique identifier. */
|
|
unsigned int id:1;
|
|
|
|
/* The attribute value references some other resource */
|
|
unsigned int reference:1;
|
|
};
|
|
|
|
struct dom_text_node {
|
|
/* The number of newlines the text string contains */
|
|
unsigned int newlines;
|
|
|
|
/* We will need to add text nodes even if they contain only whitespace.
|
|
* In order to quickly identify such nodes this member is used. */
|
|
unsigned int only_space:1;
|
|
|
|
/* Has the node->string been converted to internal charset. */
|
|
unsigned int converted:1;
|
|
};
|
|
|
|
enum dom_proc_instruction_type {
|
|
DOM_PROC_INSTRUCTION,
|
|
|
|
/* Keep this group sorted */
|
|
DOM_PROC_INSTRUCTION_XML, /* XML header */
|
|
DOM_PROC_INSTRUCTION_XML_STYLESHEET, /* XML stylesheet link */
|
|
|
|
DOM_PROC_INSTRUCTION_TYPES
|
|
};
|
|
|
|
struct dom_proc_instruction_node {
|
|
/* The target of the processing instruction (xml for '<?xml ... ?>')
|
|
* is in the @string / @length members. */
|
|
/* This holds the value to be processed */
|
|
struct dom_string instruction;
|
|
|
|
/* For fast checking of the target type */
|
|
uint16_t type; /* enum dom_proc_instruction_type */
|
|
|
|
/* For some processing instructions like xml the instructions contain
|
|
* attributes and those attribute can be collected in this @map. */
|
|
struct dom_node_list *map;
|
|
};
|
|
|
|
union dom_node_data {
|
|
struct dom_document_node document;
|
|
struct dom_document_type_node document_type;
|
|
struct dom_element_node element;
|
|
struct dom_attribute_node attribute;
|
|
struct dom_text_node text;
|
|
struct dom_id notation;
|
|
/* For entities string/length hold the notation name */
|
|
struct dom_id entity;
|
|
struct dom_proc_instruction_node proc_instruction;
|
|
|
|
/* Node types without a union member yet (mostly because it hasn't
|
|
* been necessary):
|
|
*
|
|
* DOM_NODE_CDATA_SECTION: Use dom_text_node?
|
|
* DOM_NODE_DOCUMENT_FRAGMENT: struct dom_node_list children;
|
|
* DOM_NODE_ENTITY_REFERENCE: unicode_val_T
|
|
* DOM_NODE_COMMENT
|
|
*/
|
|
};
|
|
|
|
/** DOM node
|
|
*
|
|
* The node data structure is an abstract container that can be used to
|
|
* represent the hierarchic structure of a document, such as relation
|
|
* between elements, attributes, etc.
|
|
*
|
|
* @note This structure is size critical so keep ordering to make
|
|
* it easier to pack and avoid unneeded members.
|
|
*/
|
|
struct dom_node {
|
|
/** The type of the node. Holds a #dom_node_type enum value. */
|
|
uint16_t type; /* -> enum dom_node_type */
|
|
|
|
/** Was the node string allocated? */
|
|
unsigned int allocated:1;
|
|
|
|
/** Type specific node string. Can contain either stuff like
|
|
* element name or for attributes the attribute name. */
|
|
struct dom_string string;
|
|
|
|
/** The parent node. The parent node is NULL for the root node. */
|
|
struct dom_node *parent;
|
|
|
|
/** Type specific node data. */
|
|
union dom_node_data data;
|
|
};
|
|
|
|
/** DOM node list
|
|
*
|
|
* A node list can be used for storing indexed nodes. If a node list
|
|
* should be sorted alphabetically use the #get_dom_node_map_index
|
|
* function to find the index of new nodes before inserting them. */
|
|
struct dom_node_list {
|
|
size_t size;
|
|
struct dom_node *entries[1];
|
|
};
|
|
|
|
#define foreach_dom_node(list, node, i) \
|
|
for ((i) = 0; (i) < (list)->size; (i)++) \
|
|
if (((node) = (list)->entries[(i)]))
|
|
|
|
#define foreachback_dom_node(list, node, i) \
|
|
for ((i) = (list)->size - 1; (i) > 0; (i)--) \
|
|
if (((node) = (list)->entries[(i)]))
|
|
|
|
#define is_dom_node_list_member(list, member) \
|
|
((list) && 0 <= (member) && (member) < (list)->size)
|
|
|
|
/* Adds @node to the list pointed to by @list_ptr at the given @position. If
|
|
* @position is -1 the node is added at the end. */
|
|
struct dom_node_list *
|
|
add_to_dom_node_list(struct dom_node_list **list_ptr,
|
|
struct dom_node *node, int position);
|
|
|
|
void done_dom_node_list(struct dom_node_list *list);
|
|
|
|
/* Returns the position or index where the @node has been inserted into the
|
|
* 'default' list of the @parent node. (Default means use get_dom_node_list()
|
|
* to acquire the list to search in. Returns -1, if the node is not found. */
|
|
int get_dom_node_list_index(struct dom_node *parent, struct dom_node *node);
|
|
|
|
/* Returns the position or index where the @node should be inserted into the
|
|
* node @list in order to the list to be alphabetically sorted. Assumes that
|
|
* @list is already sorted properly. */
|
|
int get_dom_node_map_index(struct dom_node_list *list, struct dom_node *node);
|
|
|
|
/* Returns the previous sibling to the node. */
|
|
struct dom_node *get_dom_node_prev(struct dom_node *node);
|
|
|
|
/* Returns the next sibling to the node. */
|
|
struct dom_node *get_dom_node_next(struct dom_node *node);
|
|
|
|
/* Returns first text node of the element or NULL. */
|
|
struct dom_node *
|
|
get_dom_node_child(struct dom_node *node, /*enum dom_node_type*/ uint16_t child_type,
|
|
int16_t child_subtype);
|
|
|
|
/* Looks up the @node_map for a node matching the requested type and name.
|
|
* The @subtype maybe be 0 indication unknown subtype and only name should be
|
|
* tested else it will indicate either the element or attribute private
|
|
* subtype. */
|
|
struct dom_node *
|
|
get_dom_node_map_entry(struct dom_node_list *node_map,
|
|
/*enum dom_node_type*/ uint16_t type, uint16_t subtype,
|
|
struct dom_string *name);
|
|
|
|
/* Removes the node and all its children and free()s itself.
|
|
* A dom_stack_callback_T must not use this to free the node
|
|
* it gets as a parameter. */
|
|
void done_dom_node(struct dom_node *node);
|
|
|
|
#ifndef DEBUG_MEMLEAK
|
|
|
|
/* The allocated argument is used as the value of node->allocated if >= 0.
|
|
* Use -1 to default node->allocated to the value of parent->allocated. */
|
|
|
|
struct dom_node *
|
|
init_dom_node_at(struct dom_node *parent, /*enum dom_node_type*/ uint16_t type,
|
|
struct dom_string *string, int allocated);
|
|
|
|
#define init_dom_node(type, string, allocated) \
|
|
init_dom_node_at(NULL, type, string, allocated)
|
|
|
|
#define add_dom_node(parent, type, string) \
|
|
init_dom_node_at(parent, type, string, -1)
|
|
|
|
#else
|
|
struct dom_node *
|
|
init_dom_node_at(char *file, int line,
|
|
struct dom_node *parent, /*enum dom_node_type*/ uint16_t type,
|
|
struct dom_string *string, int allocated);
|
|
|
|
#define init_dom_node(type, string, allocated) \
|
|
init_dom_node_at(__FILE__, __LINE__, NULL, type, string, allocated)
|
|
|
|
#define add_dom_node(parent, type, string) \
|
|
init_dom_node_at(__FILE__, __LINE__, parent, type, string, -1)
|
|
|
|
#endif /* DEBUG_MEMLEAK */
|
|
|
|
#define add_dom_element(parent, string) \
|
|
add_dom_node(parent, DOM_NODE_ELEMENT, string)
|
|
|
|
static inline struct dom_node *
|
|
add_dom_attribute(struct dom_node *parent, struct dom_string *name,
|
|
struct dom_string *value)
|
|
{
|
|
struct dom_node *node = add_dom_node(parent, DOM_NODE_ATTRIBUTE, name);
|
|
|
|
if (node && value) {
|
|
struct dom_string *str = &node->data.attribute.value;
|
|
|
|
if (node->allocated) {
|
|
if (!init_dom_string(str, value->string, value->length)) {
|
|
done_dom_node(node);
|
|
return NULL;
|
|
}
|
|
} else {
|
|
copy_dom_string(str, value);
|
|
}
|
|
}
|
|
|
|
return node;
|
|
}
|
|
|
|
static inline struct dom_node *
|
|
add_dom_proc_instruction(struct dom_node *parent, struct dom_string *string,
|
|
struct dom_string *instruction)
|
|
{
|
|
struct dom_node *node = add_dom_node(parent, DOM_NODE_PROCESSING_INSTRUCTION, string);
|
|
|
|
if (node && instruction) {
|
|
struct dom_string *str = &node->data.proc_instruction.instruction;
|
|
|
|
if (node->allocated) {
|
|
if (!init_dom_string(str, instruction->string, instruction->length)) {
|
|
done_dom_node(node);
|
|
return NULL;
|
|
}
|
|
} else {
|
|
copy_dom_string(str, instruction);
|
|
}
|
|
}
|
|
|
|
return node;
|
|
}
|
|
|
|
/* Compare two nodes returning non-zero if they differ. */
|
|
int dom_node_casecmp(struct dom_node *node1, struct dom_node *node2);
|
|
|
|
/* Returns the name of the node in an allocated string. */
|
|
struct dom_string *get_dom_node_name(struct dom_node *node);
|
|
|
|
/* Returns the value of the node or NULL if no value is defined for the node
|
|
* type. */
|
|
struct dom_string *get_dom_node_value(struct dom_node *node);
|
|
|
|
/* Returns the name used for identifying the node type. */
|
|
struct dom_string *get_dom_node_type_name(/*enum dom_node_type*/ uint16_t type);
|
|
|
|
/** Based on the type of the @a parent and the node @a type return a
|
|
* proper list or NULL. This is useful when adding a node to a parent
|
|
* node.
|
|
*
|
|
* With a <code>struct dom_node_list **list</code> returned by this
|
|
* function, there are four possibilities:
|
|
*
|
|
* - <code>list == NULL</code>. This means @a parent does not support
|
|
* child nodes of the given @a type.
|
|
*
|
|
* - <code>*list == NULL</code>. This means @a parent does not yet
|
|
* have any child nodes of the given @a type and so no list has been
|
|
* allocated for them. Callers should treat the lack of a list in
|
|
* the same way as an empty list.
|
|
*
|
|
* - <code>(*list)->size == 0</code>. This is an empty list. It is
|
|
* unspecified whether the DOM code keeps such lists; it could
|
|
* instead change them back to NULL.
|
|
*
|
|
* - <code>(*list)->size != 0</code>. This is a nonempty list.
|
|
* However, the nodes in it might not actually be of the given
|
|
* @a type because some lists are used for multiple types. */
|
|
static inline struct dom_node_list **
|
|
get_dom_node_list_by_type(struct dom_node *parent, /*enum dom_node_type*/ uint16_t type)
|
|
{
|
|
switch (parent->type) {
|
|
case DOM_NODE_DOCUMENT:
|
|
return &parent->data.document.children;
|
|
|
|
case DOM_NODE_ELEMENT:
|
|
switch (type) {
|
|
case DOM_NODE_ATTRIBUTE:
|
|
return &parent->data.element.map;
|
|
|
|
default:
|
|
return &parent->data.element.children;
|
|
}
|
|
|
|
case DOM_NODE_DOCUMENT_TYPE:
|
|
switch (type) {
|
|
case DOM_NODE_ENTITY:
|
|
return &parent->data.document_type.entities;
|
|
|
|
case DOM_NODE_NOTATION:
|
|
return &parent->data.document_type.notations;
|
|
|
|
default:
|
|
return NULL;
|
|
}
|
|
|
|
case DOM_NODE_PROCESSING_INSTRUCTION:
|
|
switch (type) {
|
|
case DOM_NODE_ATTRIBUTE:
|
|
return &parent->data.proc_instruction.map;
|
|
|
|
default:
|
|
return NULL;
|
|
}
|
|
|
|
default:
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
#define get_dom_node_list(parent, node) \
|
|
get_dom_node_list_by_type(parent, (node)->type)
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif
|