2005-09-15 09:58:31 -04:00
|
|
|
/* The DOM node handling */
|
|
|
|
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
#include "config.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include "elinks.h"
|
|
|
|
|
|
|
|
#include "document/dom/node.h"
|
|
|
|
#include "document/options.h"
|
|
|
|
#include "intl/charsets.h"
|
|
|
|
#include "util/hash.h"
|
|
|
|
#include "util/lists.h"
|
|
|
|
#include "util/memory.h"
|
|
|
|
#include "util/string.h"
|
|
|
|
|
|
|
|
|
2005-12-07 20:59:40 -05:00
|
|
|
static void done_dom_node_data(struct dom_node *node);
|
|
|
|
|
2005-09-15 09:58:31 -04:00
|
|
|
/* Node lists */
|
|
|
|
|
|
|
|
#define DOM_NODE_LIST_GRANULARITY 0x7
|
|
|
|
|
|
|
|
#define DOM_NODE_LIST_BLOCK_SIZE \
|
|
|
|
(ALIGN_MEMORY_SIZE(1, DOM_NODE_LIST_GRANULARITY) * sizeof(struct dom_node *))
|
|
|
|
|
|
|
|
/* The node list struct has one node pointer */
|
|
|
|
#define DOM_NODE_LIST_SIZE(size) \
|
|
|
|
((size - 1) * sizeof(struct dom_node *) + sizeof(struct dom_node_list))
|
|
|
|
|
|
|
|
static inline struct dom_node_list *
|
|
|
|
realloc_dom_node_list(struct dom_node_list **oldlist)
|
|
|
|
{
|
|
|
|
struct dom_node_list *list = *oldlist;
|
|
|
|
size_t size = list ? list->size : 0;
|
|
|
|
size_t oldsize = ALIGN_MEMORY_SIZE(size, DOM_NODE_LIST_GRANULARITY);
|
|
|
|
size_t newsize = ALIGN_MEMORY_SIZE(size + 1, DOM_NODE_LIST_GRANULARITY);
|
|
|
|
|
|
|
|
if (newsize <= oldsize) return list;
|
|
|
|
|
|
|
|
list = mem_realloc(list, DOM_NODE_LIST_SIZE(newsize));
|
|
|
|
if (!list) return NULL;
|
|
|
|
|
|
|
|
/* If this is the first reallocation clear the size */
|
|
|
|
if (!size) list->size = 0;
|
|
|
|
|
|
|
|
/* Clear the new block of entries */
|
|
|
|
memset(&list->entries[oldsize], 0, DOM_NODE_LIST_BLOCK_SIZE);
|
|
|
|
|
|
|
|
*oldlist = list;
|
|
|
|
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct dom_node_list *
|
|
|
|
add_to_dom_node_list(struct dom_node_list **list_ptr,
|
|
|
|
struct dom_node *node, int position)
|
|
|
|
{
|
|
|
|
struct dom_node_list *list;
|
|
|
|
|
|
|
|
assert(list_ptr && node);
|
|
|
|
|
|
|
|
list = realloc_dom_node_list(list_ptr);
|
|
|
|
if (!list) return NULL;
|
|
|
|
|
|
|
|
assertm(position < 0 || position <= list->size,
|
|
|
|
"position out of bound %d > %zu", position, list->size);
|
|
|
|
|
|
|
|
if (position < 0) {
|
|
|
|
position = list->size;
|
|
|
|
|
|
|
|
} else if (position < list->size) {
|
|
|
|
/* Make room if we have to add the node in the middle of the list */
|
|
|
|
struct dom_node **offset = &list->entries[position];
|
|
|
|
size_t size = (list->size - position) * sizeof(*offset);
|
|
|
|
|
|
|
|
memmove(offset + 1, offset, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
list->size++;
|
|
|
|
list->entries[position] = node;
|
|
|
|
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
2005-12-07 20:59:40 -05:00
|
|
|
static void
|
|
|
|
del_from_dom_node_list(struct dom_node_list *list, struct dom_node *node)
|
|
|
|
{
|
|
|
|
struct dom_node *entry;
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
if (!list) return;
|
|
|
|
|
|
|
|
foreach_dom_node(i, entry, list) {
|
|
|
|
size_t successors;
|
|
|
|
|
|
|
|
if (entry != node) continue;
|
|
|
|
|
|
|
|
successors = list->size - (i + 1);
|
|
|
|
if (successors)
|
|
|
|
memmove(&list->entries[i], &list->entries[i+1],
|
|
|
|
sizeof(*list->entries) * successors);
|
|
|
|
list->size--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-09-15 09:58:31 -04:00
|
|
|
void
|
|
|
|
done_dom_node_list(struct dom_node_list *list)
|
|
|
|
{
|
|
|
|
struct dom_node *node;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
assert(list);
|
|
|
|
|
|
|
|
foreach_dom_node (i, node, list) {
|
2005-12-07 20:59:40 -05:00
|
|
|
/* Avoid that the node start messing with the node list. */
|
|
|
|
done_dom_node_data(node);
|
2005-09-15 09:58:31 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
mem_free(list);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Node map */
|
|
|
|
|
|
|
|
struct dom_node_search {
|
|
|
|
struct dom_node *key;
|
|
|
|
int subtype;
|
|
|
|
unsigned int from, pos, to;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define INIT_DOM_NODE_SEARCH(key, subtype, list) \
|
|
|
|
{ (key), (subtype), -1, 0, (list)->size, }
|
|
|
|
|
|
|
|
static inline int
|
|
|
|
dom_node_cmp(struct dom_node_search *search, struct dom_node *node)
|
|
|
|
{
|
|
|
|
struct dom_node *key = search->key;
|
|
|
|
|
|
|
|
if (search->subtype) {
|
|
|
|
assert(key->type == node->type);
|
|
|
|
|
|
|
|
switch (key->type) {
|
|
|
|
case DOM_NODE_ELEMENT:
|
|
|
|
return search->subtype - node->data.element.type;
|
|
|
|
|
|
|
|
case DOM_NODE_ATTRIBUTE:
|
|
|
|
return search->subtype - node->data.attribute.type;
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
{
|
2005-12-10 12:37:47 -05:00
|
|
|
int length = int_min(key->string.length, node->string.length);
|
|
|
|
int string_diff = strncasecmp(key->string.string, node->string.string, length);
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
/* If the lengths or strings don't match strncasecmp() does the
|
|
|
|
* job else return which ever is bigger. */
|
|
|
|
|
2005-12-10 12:37:47 -05:00
|
|
|
return string_diff ? string_diff : key->string.length - node->string.length;
|
2005-09-15 09:58:31 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int
|
|
|
|
get_bsearch_position(struct dom_node_list *list, int from, int to)
|
|
|
|
{
|
|
|
|
int pos = from + ((to - from) / 2);
|
|
|
|
|
|
|
|
assertm(0 <= pos && pos < list->size, "pos %d", pos);
|
|
|
|
return pos;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define has_bsearch_node(from, to) ((from) + 1 < (to))
|
|
|
|
|
|
|
|
static inline struct dom_node *
|
|
|
|
dom_node_list_bsearch(struct dom_node_search *search, struct dom_node_list *list)
|
|
|
|
{
|
|
|
|
assert(has_bsearch_node(search->from, search->to));
|
|
|
|
|
|
|
|
do {
|
|
|
|
int pos = get_bsearch_position(list, search->from, search->to);
|
|
|
|
struct dom_node *node = list->entries[pos];
|
|
|
|
int difference = dom_node_cmp(search, node);
|
|
|
|
|
|
|
|
search->pos = pos;
|
|
|
|
|
|
|
|
if (!difference) return node;
|
|
|
|
|
|
|
|
if (difference < 0) {
|
|
|
|
search->to = search->pos;
|
|
|
|
} else {
|
|
|
|
search->from = search->pos;
|
|
|
|
}
|
|
|
|
|
|
|
|
} while (has_bsearch_node(search->from, search->to));
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int get_dom_node_map_index(struct dom_node_list *list, struct dom_node *node)
|
|
|
|
{
|
|
|
|
struct dom_node_search search = INIT_DOM_NODE_SEARCH(node, 0, list);
|
|
|
|
struct dom_node *match = dom_node_list_bsearch(&search, list);
|
|
|
|
|
|
|
|
return match ? search.pos : search.to;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct dom_node *
|
|
|
|
get_dom_node_map_entry(struct dom_node_list *list, enum dom_node_type type,
|
|
|
|
uint16_t subtype, unsigned char *name, int namelen)
|
|
|
|
{
|
2005-12-10 12:37:47 -05:00
|
|
|
struct dom_node node = { type, INIT_DOM_STRING(name, namelen) };
|
2005-09-15 09:58:31 -04:00
|
|
|
struct dom_node_search search = INIT_DOM_NODE_SEARCH(&node, subtype, list);
|
|
|
|
|
|
|
|
return dom_node_list_bsearch(&search, list);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Nodes */
|
|
|
|
|
|
|
|
struct dom_node *
|
|
|
|
init_dom_node_(unsigned char *file, int line,
|
|
|
|
struct dom_node *parent, enum dom_node_type type,
|
|
|
|
unsigned char *string, uint16_t length)
|
|
|
|
{
|
|
|
|
#ifdef DEBUG_MEMLEAK
|
|
|
|
struct dom_node *node = debug_mem_calloc(file, line, 1, sizeof(*node));
|
|
|
|
#else
|
|
|
|
struct dom_node *node = mem_calloc(1, sizeof(*node));
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (!node) return NULL;
|
|
|
|
|
|
|
|
node->type = type;
|
2005-12-07 19:35:48 -05:00
|
|
|
node->parent = parent;
|
2005-12-10 12:37:47 -05:00
|
|
|
set_dom_string(&node->string, string, length);
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
if (parent) {
|
|
|
|
struct dom_node_list **list = get_dom_node_list(parent, node);
|
|
|
|
int sort = (type == DOM_NODE_ATTRIBUTE);
|
|
|
|
int index;
|
|
|
|
|
|
|
|
assertm(list, "Adding %s to bad parent %s",
|
|
|
|
get_dom_node_type_name(node->type),
|
|
|
|
get_dom_node_type_name(parent->type));
|
|
|
|
|
|
|
|
index = *list && (*list)->size > 0 && sort
|
|
|
|
? get_dom_node_map_index(*list, node) : -1;
|
|
|
|
|
|
|
|
if (!add_to_dom_node_list(list, node, index)) {
|
|
|
|
done_dom_node(node);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2005-12-07 20:59:40 -05:00
|
|
|
done_dom_node_data(struct dom_node *node)
|
2005-09-15 09:58:31 -04:00
|
|
|
{
|
|
|
|
union dom_node_data *data;
|
|
|
|
|
|
|
|
assert(node);
|
|
|
|
|
|
|
|
data = &node->data;
|
|
|
|
|
|
|
|
switch (node->type) {
|
2005-12-07 20:32:23 -05:00
|
|
|
case DOM_NODE_ATTRIBUTE:
|
|
|
|
if (data->attribute.allocated)
|
2005-12-10 12:37:47 -05:00
|
|
|
done_dom_string(&node->string);
|
2005-12-07 20:32:23 -05:00
|
|
|
break;
|
|
|
|
|
|
|
|
case DOM_NODE_DOCUMENT:
|
|
|
|
if (data->document.element_ids)
|
|
|
|
free_hash(data->document.element_ids);
|
|
|
|
|
|
|
|
if (data->document.meta_nodes)
|
|
|
|
done_dom_node_list(data->document.meta_nodes);
|
|
|
|
|
|
|
|
if (data->document.children)
|
|
|
|
done_dom_node_list(data->document.children);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DOM_NODE_ELEMENT:
|
|
|
|
if (data->element.children)
|
|
|
|
done_dom_node_list(data->element.children);
|
|
|
|
|
|
|
|
if (data->element.map)
|
|
|
|
done_dom_node_list(data->element.map);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DOM_NODE_TEXT:
|
|
|
|
if (data->text.allocated)
|
2005-12-10 12:37:47 -05:00
|
|
|
done_dom_string(&node->string);
|
2005-12-07 20:32:23 -05:00
|
|
|
break;
|
|
|
|
|
|
|
|
case DOM_NODE_PROCESSING_INSTRUCTION:
|
|
|
|
if (data->proc_instruction.map)
|
|
|
|
done_dom_node_list(data->proc_instruction.map);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
2005-09-15 09:58:31 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
mem_free(node);
|
|
|
|
}
|
|
|
|
|
2005-12-07 20:59:40 -05:00
|
|
|
void
|
|
|
|
done_dom_node(struct dom_node *node)
|
|
|
|
{
|
|
|
|
assert(node);
|
|
|
|
|
|
|
|
if (node->parent) {
|
|
|
|
struct dom_node *parent = node->parent;
|
|
|
|
union dom_node_data *data = &parent->data;
|
|
|
|
|
|
|
|
switch (parent->type) {
|
|
|
|
case DOM_NODE_DOCUMENT:
|
|
|
|
del_from_dom_node_list(data->document.meta_nodes, node);
|
|
|
|
del_from_dom_node_list(data->document.children, node);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DOM_NODE_ELEMENT:
|
|
|
|
del_from_dom_node_list(data->element.children, node);
|
|
|
|
del_from_dom_node_list(data->element.map, node);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DOM_NODE_PROCESSING_INSTRUCTION:
|
|
|
|
del_from_dom_node_list(data->proc_instruction.map, node);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
done_dom_node_data(node);
|
|
|
|
}
|
|
|
|
|
2005-09-15 09:58:31 -04:00
|
|
|
#define set_node_name(name, namelen, str) \
|
|
|
|
do { (name) = (str); (namelen) = sizeof(str) - 1; } while (0)
|
|
|
|
|
|
|
|
unsigned char *
|
|
|
|
get_dom_node_name(struct dom_node *node)
|
|
|
|
{
|
|
|
|
unsigned char *name;
|
|
|
|
uint16_t namelen;
|
|
|
|
|
|
|
|
assert(node);
|
|
|
|
|
|
|
|
switch (node->type) {
|
|
|
|
case DOM_NODE_CDATA_SECTION:
|
|
|
|
set_node_name(name, namelen, "#cdata-section");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DOM_NODE_COMMENT:
|
|
|
|
set_node_name(name, namelen, "#comment");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DOM_NODE_DOCUMENT:
|
|
|
|
set_node_name(name, namelen, "#document");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DOM_NODE_DOCUMENT_FRAGMENT:
|
|
|
|
set_node_name(name, namelen, "#document-fragment");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DOM_NODE_TEXT:
|
|
|
|
set_node_name(name, namelen, "#text");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DOM_NODE_ATTRIBUTE:
|
|
|
|
case DOM_NODE_DOCUMENT_TYPE:
|
|
|
|
case DOM_NODE_ELEMENT:
|
|
|
|
case DOM_NODE_ENTITY:
|
|
|
|
case DOM_NODE_ENTITY_REFERENCE:
|
|
|
|
case DOM_NODE_NOTATION:
|
|
|
|
case DOM_NODE_PROCESSING_INSTRUCTION:
|
|
|
|
default:
|
2005-12-10 12:37:47 -05:00
|
|
|
name = node->string.string;
|
|
|
|
namelen = node->string.length;
|
2005-09-15 09:58:31 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
return memacpy(name, namelen);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned char *
|
|
|
|
compress_string(unsigned char *string, unsigned int length)
|
|
|
|
{
|
|
|
|
struct string buffer;
|
|
|
|
unsigned char escape[2] = "\\";
|
|
|
|
|
|
|
|
if (!init_string(&buffer)) return NULL;
|
|
|
|
|
|
|
|
for (; length > 0; string++, length--) {
|
|
|
|
unsigned char *bytes = string;
|
|
|
|
|
|
|
|
if (*string == '\n' || *string == '\r' || *string == '\t') {
|
|
|
|
bytes = escape;
|
|
|
|
escape[1] = *string == '\n' ? 'n'
|
|
|
|
: (*string == '\r' ? 'r' : 't');
|
|
|
|
}
|
|
|
|
|
|
|
|
add_bytes_to_string(&buffer, bytes, bytes == escape ? 2 : 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
return buffer.source;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned char *
|
|
|
|
get_dom_node_value(struct dom_node *node, int codepage)
|
|
|
|
{
|
|
|
|
unsigned char *value;
|
|
|
|
uint16_t valuelen;
|
|
|
|
|
|
|
|
assert(node);
|
|
|
|
|
|
|
|
switch (node->type) {
|
|
|
|
case DOM_NODE_ATTRIBUTE:
|
2005-12-10 12:42:54 -05:00
|
|
|
value = node->data.attribute.value.string;
|
|
|
|
valuelen = node->data.attribute.value.length;
|
2005-09-15 09:58:31 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
case DOM_NODE_PROCESSING_INSTRUCTION:
|
2005-12-10 12:59:17 -05:00
|
|
|
value = node->data.proc_instruction.instruction.string;
|
|
|
|
valuelen = node->data.proc_instruction.instruction.length;
|
2005-09-15 09:58:31 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
case DOM_NODE_CDATA_SECTION:
|
|
|
|
case DOM_NODE_COMMENT:
|
|
|
|
case DOM_NODE_TEXT:
|
2005-12-10 12:37:47 -05:00
|
|
|
value = node->string.string;
|
|
|
|
valuelen = node->string.length;
|
2005-09-15 09:58:31 -04:00
|
|
|
break;
|
|
|
|
|
|
|
|
case DOM_NODE_ENTITY_REFERENCE:
|
2005-12-10 12:37:47 -05:00
|
|
|
value = get_entity_string(node->string.string,
|
|
|
|
node->string.length,
|
2005-09-15 09:58:31 -04:00
|
|
|
codepage);
|
|
|
|
valuelen = value ? strlen(value) : 0;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DOM_NODE_NOTATION:
|
|
|
|
case DOM_NODE_DOCUMENT:
|
|
|
|
case DOM_NODE_DOCUMENT_FRAGMENT:
|
|
|
|
case DOM_NODE_DOCUMENT_TYPE:
|
|
|
|
case DOM_NODE_ELEMENT:
|
|
|
|
case DOM_NODE_ENTITY:
|
|
|
|
default:
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!value) value = "";
|
|
|
|
|
|
|
|
return compress_string(value, valuelen);
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned char *
|
|
|
|
get_dom_node_type_name(enum dom_node_type type)
|
|
|
|
{
|
|
|
|
static unsigned char *dom_node_type_names[DOM_NODES] = {
|
|
|
|
NULL,
|
|
|
|
/* DOM_NODE_ELEMENT */ "element",
|
|
|
|
/* DOM_NODE_ATTRIBUTE */ "attribute",
|
|
|
|
/* DOM_NODE_TEXT */ "text",
|
|
|
|
/* DOM_NODE_CDATA_SECTION */ "cdata-section",
|
|
|
|
/* DOM_NODE_ENTITY_REFERENCE */ "entity-reference",
|
|
|
|
/* DOM_NODE_ENTITY */ "entity",
|
|
|
|
/* DOM_NODE_PROCESSING_INSTRUCTION */ "proc-instruction",
|
|
|
|
/* DOM_NODE_COMMENT */ "comment",
|
|
|
|
/* DOM_NODE_DOCUMENT */ "document",
|
|
|
|
/* DOM_NODE_DOCUMENT_TYPE */ "document-type",
|
|
|
|
/* DOM_NODE_DOCUMENT_FRAGMENT */ "document-fragment",
|
|
|
|
/* DOM_NODE_NOTATION */ "notation",
|
|
|
|
};
|
|
|
|
|
|
|
|
assert(type < DOM_NODES);
|
|
|
|
|
|
|
|
return dom_node_type_names[type];
|
|
|
|
}
|