From 4c8a30b866e8873e652625227c07f9e924884375 Mon Sep 17 00:00:00 2001 From: Witold Filipczyk Date: Sun, 12 Mar 2023 20:51:14 +0100 Subject: [PATCH] [libdom] Written dump_document, similar to xml. Next step css. --- src/document/Makefile | 3 +- src/document/libdom/Makefile | 6 + src/document/libdom/mapa.cpp | 31 ++++ src/document/libdom/mapa.h | 16 ++ src/document/libdom/meson.build | 1 + src/document/libdom/renderer2.c | 296 ++++++++++++++++++++++++++++++++ src/document/meson.build | 7 +- src/document/xml/renderer2.cpp | 3 +- 8 files changed, 359 insertions(+), 4 deletions(-) create mode 100644 src/document/libdom/Makefile create mode 100644 src/document/libdom/mapa.cpp create mode 100644 src/document/libdom/mapa.h create mode 100644 src/document/libdom/meson.build create mode 100644 src/document/libdom/renderer2.c diff --git a/src/document/Makefile b/src/document/Makefile index f790bab0d..6d272fdca 100644 --- a/src/document/Makefile +++ b/src/document/Makefile @@ -2,8 +2,9 @@ top_builddir=../.. include $(top_builddir)/Makefile.config SUBDIRS-$(CONFIG_CSS) += css -SUBDIRS-$(CONFIG_LIBCSS) += css2 SUBDIRS-$(CONFIG_DOM) += dom +SUBDIRS-$(CONFIG_LIBCSS) += css2 +SUBDIRS-$(CONFIG_LIBDOM) += libdom SUBDIRS-$(CONFIG_XML) += xml SUBDIRS = gemini html plain diff --git a/src/document/libdom/Makefile b/src/document/libdom/Makefile new file mode 100644 index 000000000..4915d9e10 --- /dev/null +++ b/src/document/libdom/Makefile @@ -0,0 +1,6 @@ +top_builddir=../../.. +include $(top_builddir)/Makefile.config + +OBJS = mapa.obj renderer2.o + +include $(top_srcdir)/Makefile.lib diff --git a/src/document/libdom/mapa.cpp b/src/document/libdom/mapa.cpp new file mode 100644 index 000000000..5e2527ee1 --- /dev/null +++ b/src/document/libdom/mapa.cpp @@ -0,0 +1,31 @@ +/* map temporary file */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "document/libdom/mapa.h" + +void +save_in_map(void *m, void *node, int length) +{ + std::map *mapa = static_cast *>(m); + (*mapa)[length] = node; +} + +void * +create_new_element_map(void) +{ + std::map *mapa = new std::map; + + return (void *)mapa; +} + +void +clear_map(void *m) +{ + std::map *mapa = static_cast *>(m); + mapa->clear(); +} diff --git a/src/document/libdom/mapa.h b/src/document/libdom/mapa.h new file mode 100644 index 000000000..52e430ea4 --- /dev/null +++ b/src/document/libdom/mapa.h @@ -0,0 +1,16 @@ +#ifndef EL__DOCUMENT_LIBDOM_MAPA_H +#define EL__DOCUMENT_LIBDOM_MAPA_H + +#ifdef __cplusplus +extern "C" { +#endif + +void save_in_map(void *m, void *node, int length); +void *create_new_element_map(void); +void clear_map(void *m); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/document/libdom/meson.build b/src/document/libdom/meson.build new file mode 100644 index 000000000..111280d0e --- /dev/null +++ b/src/document/libdom/meson.build @@ -0,0 +1 @@ +srcs += files('mapa.cpp', 'renderer2.c') diff --git a/src/document/libdom/renderer2.c b/src/document/libdom/renderer2.c new file mode 100644 index 000000000..04a010eeb --- /dev/null +++ b/src/document/libdom/renderer2.c @@ -0,0 +1,296 @@ +/* libdom to text document renderer */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +#include "elinks.h" + +#include "cache/cache.h" +#include "document/document.h" +#include "document/renderer.h" +#include "document/html/renderer.h" +#include "document/libdom/mapa.h" +#include "document/xml/renderer2.h" +#include "ecmascript/ecmascript.h" + + +static bool +dump_dom_element_closing(struct string *buf, dom_node *node) +{ + dom_exception exc; + dom_string *node_name = NULL; + dom_node_type type; + + /* Only interested in element nodes */ + exc = dom_node_get_node_type(node, &type); + + if (exc != DOM_NO_ERR) { + fprintf(stderr, "Exception raised for node_get_node_type\n"); + return false; + } else { + if (type != DOM_ELEMENT_NODE) { + /* Nothing to print */ + return true; + } + } + + /* Get element name */ + exc = dom_node_get_node_name(node, &node_name); + if (exc != DOM_NO_ERR) { + fprintf(stderr, "Exception raised for get_node_name\n"); + return false; + } else if (node_name == NULL) { + fprintf(stderr, "Broken: root_name == NULL\n"); + return false; + } + + /* Get string data and print element name */ + add_to_string(buf, "'); + + /* Finished with the node_name dom_string */ + dom_string_unref(node_name); + + return true; +} + +static bool +dump_node_element_attribute(struct string *buf, dom_node *node) +{ + dom_exception exc; + dom_string *attr = NULL; + dom_string *attr_value = NULL; + + exc = dom_attr_get_name((struct dom_attr *)node, &attr); + + if (exc != DOM_NO_ERR) { + fprintf(stderr, "Exception raised for dom_string_create\n"); + return false; + } + + /* Get attribute's value */ + exc = dom_attr_get_value((struct dom_attr *)node, &attr_value); + if (exc != DOM_NO_ERR) { + fprintf(stderr, "Exception raised for element_get_attribute\n"); + dom_string_unref(attr); + return false; + } else if (attr_value == NULL) { + /* Element lacks required attribute */ + dom_string_unref(attr); + return true; + } + + add_char_to_string(buf, ' '); + add_bytes_to_string(buf, dom_string_data(attr), dom_string_byte_length(attr)); + add_to_string(buf, "=\""); + add_bytes_to_string(buf, dom_string_data(attr_value), dom_string_byte_length(attr_value)); + add_char_to_string(buf, '"'); + + /* Finished with the attr dom_string */ + dom_string_unref(attr); + dom_string_unref(attr_value); + + return true; +} + + +static bool +dump_dom_element(void *mapa, struct string *buf, dom_node *node, int depth) +{ + dom_exception exc; + dom_string *node_name = NULL; + dom_node_type type; + dom_namednodemap *attrs; + + /* Only interested in element nodes */ + exc = dom_node_get_node_type(node, &type); + + if (exc != DOM_NO_ERR) { + fprintf(stderr, "Exception raised for node_get_node_type\n"); + return false; + } else { + if (type == DOM_TEXT_NODE) { + dom_string *str; + + exc = dom_node_get_text_content(node, &str); + + if (exc == DOM_NO_ERR && str != NULL) { + int length = dom_string_byte_length(str); + const char *string_text = dom_string_data(str); + + if (!((length == 1) && (*string_text == '\n'))) { + add_bytes_to_string(buf, string_text, length); + } + dom_string_unref(str); + } + return true; + } + if (type != DOM_ELEMENT_NODE) { + /* Nothing to print */ + return true; + } + } + + /* Get element name */ + exc = dom_node_get_node_name(node, &node_name); + if (exc != DOM_NO_ERR) { + fprintf(stderr, "Exception raised for get_node_name\n"); + return false; + } + + add_char_to_string(buf, '<'); + save_in_map(mapa, node, buf->length); + + /* Get string data and print element name */ + add_bytes_to_string(buf, dom_string_data(node_name), dom_string_byte_length(node_name)); + + exc = dom_node_get_attributes(node, &attrs); + + if (exc == DOM_NO_ERR) { + dom_ulong length; + + exc = dom_namednodemap_get_length(attrs, &length); + + if (exc == DOM_NO_ERR) { + int i; + + for (i = 0; i < length; ++i) { + dom_node *attr; + + exc = dom_namednodemap_item(attrs, i, &attr); + + if (exc == DOM_NO_ERR) { + dump_node_element_attribute(buf, attr); + dom_node_unref(attr); + } + } + } + dom_node_unref(attrs); + } + add_char_to_string(buf, '>'); + + /* Finished with the node_name dom_string */ + dom_string_unref(node_name); + + return true; +} + +static bool +walk_tree(void *mapa, struct string *buf, dom_node *node, bool start, int depth) +{ + dom_exception exc; + dom_node *child; + + /* Print this node's entry */ + if (dump_dom_element(mapa, buf, node, depth) == false) { + /* There was an error; return */ + return false; + } + + /* Get the node's first child */ + exc = dom_node_get_first_child(node, &child); + if (exc != DOM_NO_ERR) { + fprintf(stderr, "Exception raised for node_get_first_child\n"); + return false; + } else if (child != NULL) { + /* node has children; decend to children's depth */ + depth++; + + /* Loop though all node's children */ + do { + dom_node *next_child; + + /* Visit node's descendents */ + if (walk_tree(mapa, buf, child, false, depth) == false) { + /* There was an error; return */ + dom_node_unref(child); + return false; + } + + /* Go to next sibling */ + exc = dom_node_get_next_sibling(child, &next_child); + if (exc != DOM_NO_ERR) { + fprintf(stderr, "Exception raised for " + "node_get_next_sibling\n"); + dom_node_unref(child); + return false; + } + + dom_node_unref(child); + child = next_child; + } while (child != NULL); /* No more children */ + } + dump_dom_element_closing(buf, node); + + return true; +} + +void +render_xhtml_document(struct cache_entry *cached, struct document *document, struct string *buffer) +{ + dom_exception exc; /* returned by libdom functions */ + dom_document *doc = NULL; /* document, loaded into libdom */ + dom_node *root = NULL; /* root element of document */ + void *mapa = NULL; + + if (!document->dom) { + (void)get_convert_table(cached->head ?: (char *)"", document->options.cp, + document->options.assume_cp, + &document->cp, + &document->cp_status, + document->options.hard_assume); + + document->dom = document_parse(document); + } + + if (!document->dom) { + return; + } + + doc = document->dom; + + /* Get root element */ + exc = dom_document_get_document_element(doc, &root); + if (exc != DOM_NO_ERR) { + fprintf(stderr, "Exception raised for get_document_element\n"); + //dom_node_unref(doc); + return; + } else if (root == NULL) { + fprintf(stderr, "Broken: root == NULL\n"); + //dom_node_unref(doc); + return; + } + + if (!buffer) { + struct string tt; + + if (!init_string(&tt)) { + return; + } + mapa = document->element_map; + + if (!mapa) { + mapa = create_new_element_map(); + document->element_map = (void *)mapa; + } else { + clear_map(mapa); + } + + if (walk_tree(mapa, &tt, root, true, 0) == false) { + fprintf(stderr, "Failed to complete DOM structure dump.\n"); + dom_node_unref(root); + //dom_node_unref(doc); + return; + } + dom_node_unref(root); + buffer = &tt; + document->text = tt.source; + } + render_html_document(cached, document, buffer); +} diff --git a/src/document/meson.build b/src/document/meson.build index 21938cf4c..3c69a0615 100644 --- a/src/document/meson.build +++ b/src/document/meson.build @@ -1,11 +1,14 @@ if conf_data.get('CONFIG_CSS') subdir('css') endif +if conf_data.get('CONFIG_DOM') + subdir('dom') +endif if conf_data.get('CONFIG_LIBCSS') subdir('css2') endif -if conf_data.get('CONFIG_DOM') - subdir('dom') +if conf_data.get('CONFIG_LIBDOM') + subdir('libdom') endif subdir('gemini') subdir('html') diff --git a/src/document/xml/renderer2.cpp b/src/document/xml/renderer2.cpp index f5447d3c0..3381d5296 100644 --- a/src/document/xml/renderer2.cpp +++ b/src/document/xml/renderer2.cpp @@ -40,7 +40,7 @@ #include #include - +#ifndef CONFIG_LIBDOM static void dump_element(std::map *mapa, struct string *buf, xmlpp::Element *element) { @@ -153,3 +153,4 @@ render_xhtml_document(struct cache_entry *cached, struct document *document, str } render_html_document(cached, document, buffer); } +#endif