1
0
mirror of https://github.com/rkd77/elinks.git synced 2025-01-03 14:57:44 -05:00

Add the basic support for rendering RSS documents

It is very simple for now, so it only displays headlines and doesn't
support a whole lot of RSS flavours.
This commit is contained in:
Jonas Fonseca 2006-01-08 03:44:23 +01:00 committed by Jonas Fonseca
parent 2f9c406ef1
commit bc55cd55cb
3 changed files with 304 additions and 13 deletions

View File

@ -24,6 +24,7 @@
#include "document/renderer.h"
#include "dom/scanner.h"
#include "dom/sgml/parser.h"
#include "dom/sgml/rss/rss.h"
#include "dom/node.h"
#include "dom/stack.h"
#include "intl/charsets.h"
@ -54,6 +55,13 @@ struct dom_renderer {
unsigned int find_url:1;
#endif
struct screen_char styles[DOM_NODES];
/* RSS renderer variables */
struct dom_node *channel;
struct dom_node_list *items;
struct dom_node *item;
struct dom_node *node;
struct dom_string text;
};
#define URL_REGEX "(file://|((f|ht|nt)tp(s)?|smb)://[[:alnum:]]+([-@:.]?[[:alnum:]])*\\.[[:alpha:]]{2,4}(:[[:digit:]]+)?)(/(%[[:xdigit:]]{2}|[-_~&=;?.a-z0-9])*)*"
@ -334,7 +342,8 @@ render_dom_text(struct dom_renderer *renderer, struct screen_char *template,
ALIGN_LINK(&(doc)->links, (doc)->nlinks, size)
static inline struct link *
add_dom_link(struct dom_renderer *renderer, unsigned char *string, int length)
add_dom_link(struct dom_renderer *renderer, unsigned char *string, int length,
unsigned char *uristring, int urilength)
{
struct document *document = renderer->document;
int x = renderer->canvas_x;
@ -343,7 +352,6 @@ add_dom_link(struct dom_renderer *renderer, unsigned char *string, int length)
struct link *link;
struct point *point;
struct screen_char template;
unsigned char *uristring;
color_T fgcolor;
if (!realloc_document_links(document, document->nlinks + 1))
@ -355,7 +363,7 @@ add_dom_link(struct dom_renderer *renderer, unsigned char *string, int length)
return NULL;
uristring = convert_string(renderer->convert_table,
string, length, document->options.cp,
uristring, urilength, document->options.cp,
CSM_DEFAULT, NULL, NULL, NULL);
if (!uristring) return NULL;
@ -479,7 +487,7 @@ render_dom_node_enhanced_text(struct dom_renderer *renderer, struct dom_node *no
string += offset;
length -= offset;
add_dom_link(renderer, string, matchlen);
add_dom_link(renderer, string, matchlen, string, matchlen);
length -= matchlen;
string += matchlen;
@ -601,7 +609,8 @@ render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void
break;
}
add_dom_link(renderer, value, valuelen - skips);
add_dom_link(renderer, value, valuelen - skips,
value, valuelen - skips);
if (skips > 0) {
value += valuelen - skips;
@ -682,6 +691,272 @@ static struct dom_stack_context_info dom_source_renderer_context_info = {
};
/* DOM RSS Renderer */
static void
dom_rss_push_element(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->current->data;
assert(node && renderer && renderer->document);
switch (node->data.element.type) {
case RSS_ELEMENT_CHANNEL:
/* The stack should have: #document * channel */
if (stack->depth != 3)
break;
if (!renderer->channel) {
renderer->channel = node;
}
break;
case RSS_ELEMENT_ITEM:
/* The stack should have: #document * channel item */
#if 0
/* Don't be so strict ... */
if (stack->depth != 4)
break;
#endif
/* ... but be exclusive. */
if (renderer->item)
break;
add_to_dom_node_list(&renderer->items, node, -1);
renderer->item = node;
break;
case RSS_ELEMENT_LINK:
case RSS_ELEMENT_DESCRIPTION:
case RSS_ELEMENT_TITLE:
case RSS_ELEMENT_AUTHOR:
case RSS_ELEMENT_PUBDATE:
if (!node->parent || renderer->node != node->parent)
break;
renderer->node = node;
}
}
static void
dom_rss_pop_element(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->current->data;
struct dom_node_list **list;
assert(node && renderer && renderer->document);
switch (node->data.element.type) {
case RSS_ELEMENT_ITEM:
if (is_dom_string_set(&renderer->text))
done_dom_string(&renderer->text);
renderer->item = NULL;
break;
case RSS_ELEMENT_LINK:
case RSS_ELEMENT_DESCRIPTION:
case RSS_ELEMENT_TITLE:
case RSS_ELEMENT_AUTHOR:
case RSS_ELEMENT_PUBDATE:
if (!is_dom_string_set(&renderer->text)
|| !node->parent
|| renderer->item != node->parent
|| renderer->node != node)
break;
/* Replace any child nodes with the normalized text node. */
list = get_dom_node_list(node->parent, node);
done_dom_node_list(*list);
if (is_dom_string_set(&renderer->text)) {
if (!add_dom_node(node, DOM_NODE_TEXT, &renderer->text))
done_dom_string(&renderer->text);
}
renderer->node = NULL;
break;
default:
break;
}
}
static void
dom_rss_push_content(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->current->data;
unsigned char *string = node->string.string;
int length = node->string.length;
assert(node && renderer && renderer->document);
if (!renderer->node)
return;
if (node->type == DOM_NODE_ENTITY_REFERENCE) {
string -= 1;
length += 2;
}
if (!is_dom_string_set(&renderer->text)) {
init_dom_string(&renderer->text, string, length);
} else {
add_to_dom_string(&renderer->text, string, length);
}
}
static struct dom_string *
get_rss_node_text(struct dom_node *node)
{
struct dom_node *child;
int index;
if (!node->data.element.children)
return NULL;
foreach_dom_node (node->data.element.children, child, index) {
if (child->type == DOM_NODE_TEXT)
return &child->string;
}
return NULL;
}
static struct dom_node *
get_rss_child(struct dom_node *parent, enum rss_element_type type)
{
struct dom_node *node;
int index;
if (!parent->data.element.children)
return NULL;
foreach_dom_node (parent->data.element.children, node, index) {
if (node->type == DOM_NODE_ELEMENT
&& type == node->data.element.type)
return node;
}
return NULL;
}
static struct dom_string *
get_rss_text(struct dom_node *node, enum rss_element_type type)
{
node = get_rss_child(node, type);
return node ? get_rss_node_text(node) : NULL;
}
static void
render_rss_item(struct dom_renderer *renderer, struct dom_node *item)
{
struct dom_string *title = get_rss_text(item, RSS_ELEMENT_TITLE);
struct dom_string *link = get_rss_text(item, RSS_ELEMENT_LINK);
struct dom_string *author = get_rss_text(item, RSS_ELEMENT_AUTHOR);
struct dom_string *date = get_rss_text(item, RSS_ELEMENT_PUBDATE);
if (title && is_dom_string_set(title)) {
render_dom_text(renderer, &renderer->styles[DOM_NODE_ELEMENT],
title->string, title->length);
}
if (link && is_dom_string_set(link)) {
X(renderer)++;
add_dom_link(renderer, "[link]", 6, link->string, link->length);
}
/* New line, and indent */
Y(renderer)++;
X(renderer) = 0;
if (author && is_dom_string_set(author)) {
render_dom_text(renderer, &renderer->styles[DOM_NODE_COMMENT],
author->string, author->length);
}
if (date && is_dom_string_set(date)) {
if (author && is_dom_string_set(author)) {
render_dom_text(renderer, &renderer->styles[DOM_NODE_COMMENT],
" - ", 3);
}
render_dom_text(renderer, &renderer->styles[DOM_NODE_COMMENT],
date->string, date->length);
}
if ((author && is_dom_string_set(author))
|| (date && is_dom_string_set(date))) {
/* New line, and indent */
Y(renderer)++;
X(renderer) = 0;
}
}
static void
dom_rss_pop_document(struct dom_stack *stack, struct dom_node *root, void *data)
{
struct dom_renderer *renderer = stack->current->data;
if (!renderer->channel)
return;
render_rss_item(renderer, renderer->channel);
if (renderer->items) {
struct dom_node *node;
int index;
foreach_dom_node (renderer->items, node, index) {
Y(renderer)++;
X(renderer) = 0;
render_rss_item(renderer, node);
}
}
if (is_dom_string_set(&renderer->text))
done_dom_string(&renderer->text);
mem_free_if(renderer->items);
done_dom_node(root);
}
static struct dom_stack_context_info dom_rss_renderer_context_info = {
/* Object size: */ 0,
/* Push: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ dom_rss_push_element,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ dom_rss_push_content,
/* DOM_NODE_CDATA_SECTION */ dom_rss_push_content,
/* DOM_NODE_ENTITY_REFERENCE */ dom_rss_push_content,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
},
/* Pop: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ dom_rss_pop_element,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ NULL,
/* DOM_NODE_CDATA_SECTION */ NULL,
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ dom_rss_pop_document,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
}
};
/* Shared multiplexor between renderers */
void
render_dom_document(struct cache_entry *cached, struct document *document,
@ -692,14 +967,13 @@ render_dom_document(struct cache_entry *cached, struct document *document,
struct conv_table *convert_table;
struct sgml_parser *parser;
enum sgml_document_type doctype;
enum sgml_parser_type parser_type;
unsigned char *string = struri(cached->uri);
size_t length = strlen(string);
struct dom_string uri = INIT_DOM_STRING(string, length);
struct dom_string source = INIT_DOM_STRING(buffer->source, buffer->length);
enum sgml_parser_code code;
assert(document->options.plain);
convert_table = get_convert_table(head, document->options.cp,
document->options.assume_cp,
&document->cp,
@ -710,6 +984,11 @@ render_dom_document(struct cache_entry *cached, struct document *document,
document->bgcolor = document->options.default_bg;
if (document->options.plain)
parser_type = SGML_PARSER_STREAM;
else
parser_type = SGML_PARSER_TREE;
/* FIXME: Refactor the doctype lookup. */
if (!strcasecmp("application/rss+xml", cached->content_type)) {
doctype = SGML_DOCTYPE_RSS;
@ -730,11 +1009,17 @@ render_dom_document(struct cache_entry *cached, struct document *document,
doctype = SGML_DOCTYPE_HTML;
}
parser = init_sgml_parser(SGML_PARSER_STREAM, doctype, &uri, 0);
if (!parser) return;
parser = init_sgml_parser(parser_type, doctype, &uri, 0);
if (!parser) return;
add_dom_stack_context(&parser->stack, &renderer,
&dom_source_renderer_context_info);
if (document->options.plain) {
add_dom_stack_context(&parser->stack, &renderer,
&dom_source_renderer_context_info);
} else if (doctype == SGML_DOCTYPE_RSS) {
add_dom_stack_context(&parser->stack, &renderer,
&dom_rss_renderer_context_info);
}
/* FIXME: When rendering this way we don't really care about the code.
* However, it will be useful when we will be able to also

View File

@ -254,7 +254,13 @@ render_encoded_document(struct cache_entry *cached, struct document *document)
render_plain_document(cached, document, &buffer);
} else {
render_html_document(cached, document, &buffer);
#ifdef CONFIG_DOM
if (cached->content_type
&& (!strlcasecmp("application/rss+xml", 19, cached->content_type, -1)))
render_dom_document(cached, document, &buffer);
else
#endif
render_html_document(cached, document, &buffer);
}
if (encoding != ENCODING_NONE) {

View File

@ -1171,7 +1171,7 @@ struct {
{ "application/xhtml+xml", 0 }, /* RFC 3236 */
#if CONFIG_DOM
{ "application/docbook+xml", 1 },
{ "application/rss+xml", 1 },
{ "application/rss+xml", 0 },
{ "application/xbel+xml", 1 },
{ "application/xbel", 1 },
{ "application/x-xbel", 1 },