1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-10-08 05:04:16 -04:00
elinks/src/dom/configuration.c

346 lines
8.4 KiB
C

/* DOM Configuration */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "elinks.h"
#include "dom/configuration.h"
#include "dom/node.h"
#include "dom/stack.h"
#include "dom/string.h"
static enum dom_code
normalize_text_node_whitespace(struct dom_node *node)
{
unsigned char buf[256];
struct dom_string string = INIT_DOM_STRING(NULL, 0);
int count = 0, i = 0;
unsigned char *text = node->string.string;
assert(node->type == DOM_NODE_TEXT);
while (i < node->string.length) {
int j;
for (j = 0; j < sizeof(buf) && i < node->string.length; i++) {
unsigned char data = text[i];
if (isspace(data)) {
if (count == 1)
continue;
data = ' ';
count = 1;
} else {
count = 0;
}
buf[j++] = data;
}
if (!add_to_dom_string(&string, buf, j)) {
done_dom_string(&string);
return DOM_CODE_ALLOC_ERR;
}
}
if (node->allocated)
done_dom_string(&node->string);
set_dom_string(&node->string, string.string, string.length);
node->allocated = 1;
return DOM_CODE_OK;
}
static enum dom_code
append_node_text(struct dom_config *config, struct dom_node *node)
{
struct dom_node *prev = get_dom_node_prev(node);
size_t length;
struct dom_string dest;
struct dom_string src;
int error = 0;
copy_struct(&src, &node->string);
if (!prev || prev->type != DOM_NODE_TEXT) {
/* Preserve text nodes with no one to append to. */
if (node->type == DOM_NODE_TEXT)
return DOM_CODE_OK;
prev = NULL;
set_dom_string(&dest, NULL, 0);
} else {
if (prev->allocated) {
copy_struct(&dest, &prev->string);
} else {
set_dom_string(&dest, NULL, 0);
if (!add_to_dom_string(&dest, prev->string.string, prev->string.length))
return DOM_CODE_ALLOC_ERR;
set_dom_string(&prev->string, dest.string, dest.length);
prev->allocated = 1;
}
}
length = dest.length;
switch (node->type) {
case DOM_NODE_CDATA_SECTION:
case DOM_NODE_TEXT:
if (!add_to_dom_string(&dest, src.string, src.length))
error = 1;
break;
case DOM_NODE_ENTITY_REFERENCE:
/* FIXME: Until we will have uniform encoding at this point
* (UTF-8) we just add the entity reference unexpanded assuming
* that convert_string() will eventually do the work of
* expanding it. */
if (!add_to_dom_string(&dest, "&", 1)
|| !add_to_dom_string(&dest, src.string, src.length)
|| !add_to_dom_string(&dest, ";", 1)) {
error = 1;
}
break;
default:
INTERNAL("Cannot append from node %d", node->type);
}
if (error) {
if (prev)
prev->string.length = length;
else
done_dom_string(&dest);
return DOM_CODE_ALLOC_ERR;
}
if (prev) {
copy_struct(&prev->string, &dest);
if ((config->flags & DOM_CONFIG_NORMALIZE_WHITESPACE)
&& node->type != DOM_NODE_ENTITY_REFERENCE) {
/* XXX: Ignore errors since we want to always
* free the appended node at this point. */
normalize_text_node_whitespace(prev);
}
return DOM_CODE_FREE_NODE;
} else {
int was_cdata_section = node->type == DOM_NODE_CDATA_SECTION;
node->type = DOM_NODE_TEXT;
memset(&node->data, 0, sizeof(node->data));
node->allocated = 1;
copy_struct(&node->string, &dest);
if ((config->flags & DOM_CONFIG_NORMALIZE_WHITESPACE)
&& was_cdata_section) {
/* XXX: Ignore errors since we want to always ok the
* append. */
normalize_text_node_whitespace(node);
}
return DOM_CODE_OK;
}
}
static enum dom_code
dom_normalize_node_end(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_config *config = stack->current->data;
enum dom_code code = DOM_CODE_OK;
switch (node->type) {
case DOM_NODE_ELEMENT:
if ((config->flags & DOM_CONFIG_UNKNOWN)
&& !node->data.element.type) {
/* Drop elements that are not known from the built-in
* node info. */
code = DOM_CODE_FREE_NODE;
}
break;
case DOM_NODE_ATTRIBUTE:
if ((config->flags & DOM_CONFIG_UNKNOWN)
&& !node->data.attribute.type) {
/* Drop elements that are not known from the built-in
* node info. */
code = DOM_CODE_FREE_NODE;
}
break;
case DOM_NODE_PROCESSING_INSTRUCTION:
if ((config->flags & DOM_CONFIG_UNKNOWN)
&& !node->data.proc_instruction.type) {
/* Drop elements that are not known from the built-in
* node info. */
code = DOM_CODE_FREE_NODE;
}
break;
case DOM_NODE_TEXT:
if (!(config->flags & DOM_CONFIG_ELEMENT_CONTENT_WHITESPACE)
&& node->data.text.only_space) {
/* Discard all Text nodes that contain
* whitespaces in element content]. */
code = DOM_CODE_FREE_NODE;
} else {
code = append_node_text(config, node);
}
break;
case DOM_NODE_COMMENT:
if (!(config->flags & DOM_CONFIG_COMMENTS)) {
/* Discard all comments. */
code = DOM_CODE_FREE_NODE;
}
break;
case DOM_NODE_CDATA_SECTION:
if (!(config->flags & DOM_CONFIG_CDATA_SECTIONS)) {
/* Transform CDATASection nodes into Text nodes. The new Text
* node is then combined with any adjacent Text node. */
code = append_node_text(config, node);
}
break;
case DOM_NODE_ENTITY_REFERENCE:
if (!(config->flags & DOM_CONFIG_ENTITIES)) {
/* Remove all EntityReference nodes from the document,
* putting the entity expansions directly in their place. Text
* nodes are normalized. Only unexpanded entity references are
* kept in the document. */
code = append_node_text(config, node);
}
break;
case DOM_NODE_DOCUMENT:
break;
default:
break;
}
return code;
}
enum dom_code
dom_normalize_text(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_config *config = stack->current->data;
if (config->flags & DOM_CONFIG_NORMALIZE_WHITESPACE) {
/* Normalize whitespace in the text. */
return normalize_text_node_whitespace(node);
}
return DOM_CODE_OK;
}
static struct dom_stack_context_info dom_config_normalizer_context = {
/* Object size: */ 0,
/* Push: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ NULL,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ dom_normalize_text,
/* DOM_NODE_CDATA_SECTION */ NULL,
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
},
/* Pop: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ dom_normalize_node_end,
/* DOM_NODE_ATTRIBUTE */ dom_normalize_node_end,
/* DOM_NODE_TEXT */ dom_normalize_node_end,
/* DOM_NODE_CDATA_SECTION */ dom_normalize_node_end,
/* DOM_NODE_ENTITY_REFERENCE */ dom_normalize_node_end,
/* DOM_NODE_ENTITY */ dom_normalize_node_end,
/* DOM_NODE_PROC_INSTRUCTION */ dom_normalize_node_end,
/* DOM_NODE_COMMENT */ dom_normalize_node_end,
/* DOM_NODE_DOCUMENT */ dom_normalize_node_end,
/* DOM_NODE_DOCUMENT_TYPE */ dom_normalize_node_end,
/* DOM_NODE_DOCUMENT_FRAGMENT */ dom_normalize_node_end,
/* DOM_NODE_NOTATION */ dom_normalize_node_end,
}
};
struct dom_config *
add_dom_config_normalizer(struct dom_stack *stack, struct dom_config *config,
enum dom_config_flag flags)
{
memset(config, 0, sizeof(*config));
config->flags = flags;
if (add_dom_stack_context(stack, config, &dom_config_normalizer_context))
return config;
return NULL;
}
struct dom_config_info {
struct dom_string name;
enum dom_config_flag flag;
};
#define DOM_CONFIG(name, flag) \
{ STATIC_DOM_STRING(name), (flag) }
static struct dom_config_info dom_config_info[] = {
DOM_CONFIG("cdata-sections", DOM_CONFIG_CDATA_SECTIONS),
DOM_CONFIG("comments", DOM_CONFIG_COMMENTS),
DOM_CONFIG("element-content-whitespace",DOM_CONFIG_ELEMENT_CONTENT_WHITESPACE),
DOM_CONFIG("entities", DOM_CONFIG_ENTITIES),
DOM_CONFIG("normalize-characters", DOM_CONFIG_NORMALIZE_CHARACTERS),
DOM_CONFIG("unknown", DOM_CONFIG_UNKNOWN),
DOM_CONFIG("normalize-whitespace", DOM_CONFIG_NORMALIZE_WHITESPACE),
};
static enum dom_config_flag
get_dom_config_flag(struct dom_string *name)
{
int i;
for (i = 0; i < sizeof_array(dom_config_info); i++)
if (!dom_string_casecmp(&dom_config_info[i].name, name))
return dom_config_info[i].flag;
return 0;
}
enum dom_config_flag
parse_dom_config(unsigned char *flaglist, unsigned char separator)
{
enum dom_config_flag flags = 0;
while (flaglist) {
unsigned char *end = separator ? strchr((const char *)flaglist, separator) : NULL;
int length = end ? end - flaglist : strlen(flaglist);
struct dom_string name = INIT_DOM_STRING(flaglist, length);
flags |= get_dom_config_flag(&name);
if (end) end++;
flaglist = end;
}
return flags;
}