mirror of
https://github.com/rkd77/elinks.git
synced 2024-12-04 14:46:47 -05:00
Make URL inside cdata, comment and text nodes accessible
It depends on existence of <regex.h> system header and that can be enabled via document.plain.display_links. The URL regex was supplied by yanek.
This commit is contained in:
parent
dda6064b47
commit
4fa0937ca5
@ -4,6 +4,10 @@
|
|||||||
#include "config.h"
|
#include "config.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <sys/types.h> /* FreeBSD needs this before regex.h */
|
||||||
|
#ifdef HAVE_REGEX_H
|
||||||
|
#include <regex.h>
|
||||||
|
#endif
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include "elinks.h"
|
#include "elinks.h"
|
||||||
@ -45,9 +49,15 @@ struct dom_renderer {
|
|||||||
unsigned char *position;
|
unsigned char *position;
|
||||||
int canvas_x, canvas_y;
|
int canvas_x, canvas_y;
|
||||||
|
|
||||||
|
#ifdef HAVE_REGEX_H
|
||||||
|
regex_t url_regex;
|
||||||
|
unsigned int find_url:1;
|
||||||
|
#endif
|
||||||
struct screen_char styles[DOM_NODES];
|
struct screen_char styles[DOM_NODES];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define URL_REGEX "(ftp|http)://[[:alnum:]]+([-_\\.]?[[:alnum:]])*\\.[[:alpha:]]{2,4}(\\/{1}[-_~&=;\\?\\.a-z0-9]*)*"
|
||||||
|
#define URL_REGFLAGS (REG_ICASE | REG_EXTENDED)
|
||||||
|
|
||||||
static void
|
static void
|
||||||
init_template(struct screen_char *template, struct document_options *options,
|
init_template(struct screen_char *template, struct document_options *options,
|
||||||
@ -91,6 +101,16 @@ init_dom_renderer(struct dom_renderer *renderer, struct document *document,
|
|||||||
renderer->end = buffer->source + buffer->length;
|
renderer->end = buffer->source + buffer->length;
|
||||||
renderer->position = renderer->source;
|
renderer->position = renderer->source;
|
||||||
|
|
||||||
|
#ifdef HAVE_REGEX_H
|
||||||
|
if (renderer->document->options.plain_display_links) {
|
||||||
|
if (regcomp(&renderer->url_regex, URL_REGEX, URL_REGFLAGS)) {
|
||||||
|
regfree(&renderer->url_regex);
|
||||||
|
} else {
|
||||||
|
renderer->find_url = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
for (type = 0; type < DOM_NODES; type++) {
|
for (type = 0; type < DOM_NODES; type++) {
|
||||||
struct screen_char *template = &renderer->styles[type];
|
struct screen_char *template = &renderer->styles[type];
|
||||||
color_T background = document->options.default_bg;
|
color_T background = document->options.default_bg;
|
||||||
@ -401,6 +421,46 @@ render_dom_node_text(struct dom_renderer *renderer, struct screen_char *template
|
|||||||
render_dom_text(renderer, template, string, length);
|
render_dom_text(renderer, template, string, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_REGEX_H
|
||||||
|
static inline void
|
||||||
|
render_dom_node_enhanced_text(struct dom_renderer *renderer, struct dom_node *node)
|
||||||
|
{
|
||||||
|
regex_t *regex = &renderer->url_regex;
|
||||||
|
regmatch_t regmatch;
|
||||||
|
unsigned char *string = node->string.string;
|
||||||
|
int length = node->string.length;
|
||||||
|
struct screen_char *template = &renderer->styles[node->type];
|
||||||
|
|
||||||
|
if (check_dom_node_source(renderer, string, length)) {
|
||||||
|
render_dom_flush(renderer, string);
|
||||||
|
renderer->position = string + length;
|
||||||
|
assert_source(renderer, renderer->position, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (length > 0 && !regexec(regex, string, 1, ®match, 0)) {
|
||||||
|
int matchlen = regmatch.rm_eo - regmatch.rm_so;
|
||||||
|
int offset = regmatch.rm_so;
|
||||||
|
|
||||||
|
if (!matchlen || offset < 0 || regmatch.rm_eo > length)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (offset > 0)
|
||||||
|
render_dom_text(renderer, template, string, offset);
|
||||||
|
|
||||||
|
string += offset;
|
||||||
|
length -= offset;
|
||||||
|
|
||||||
|
add_dom_link(renderer, string, matchlen);
|
||||||
|
|
||||||
|
length -= matchlen;
|
||||||
|
string += matchlen;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (length > 0)
|
||||||
|
render_dom_text(renderer, template, string, length);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static void
|
static void
|
||||||
render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *data)
|
render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||||
{
|
{
|
||||||
@ -408,8 +468,16 @@ render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *dat
|
|||||||
|
|
||||||
assert(node && renderer && renderer->document);
|
assert(node && renderer && renderer->document);
|
||||||
|
|
||||||
/* TODO: For (atleast) text, CDATA section and comment nodes check
|
#ifdef HAVE_REGEX_H
|
||||||
* for URIs ala document->options.plain_display_links */
|
if (renderer->find_url
|
||||||
|
&& (node->type == DOM_NODE_TEXT
|
||||||
|
|| node->type == DOM_NODE_CDATA_SECTION
|
||||||
|
|| node->type == DOM_NODE_COMMENT)) {
|
||||||
|
render_dom_node_enhanced_text(renderer, node);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
render_dom_node_text(renderer, &renderer->styles[node->type], node);
|
render_dom_node_text(renderer, &renderer->styles[node->type], node);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -634,5 +702,9 @@ render_dom_document(struct cache_entry *cached, struct document *document,
|
|||||||
pop_dom_node(&parser->stack);
|
pop_dom_node(&parser->stack);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_REGEX_H
|
||||||
|
if (renderer.find_url)
|
||||||
|
regfree(&renderer.url_regex);
|
||||||
|
#endif
|
||||||
done_sgml_parser(parser);
|
done_sgml_parser(parser);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user