mirror of
https://github.com/rkd77/elinks.git
synced 2024-12-04 14:46:47 -05:00
Merge with http://elinks.cz/elinks.git
This commit is contained in:
commit
2cfd0a9bb4
@ -33,8 +33,8 @@ mcmd = @$(if $($(mquiet)cmd_$(1)),echo $($(mquiet)cmd_$(1)) &&) $(cmd_$(1))
|
||||
ecmd = @$(if $($(mquiet)cmd_$(1)),printf "%-38s " $($(mquiet)cmd_$(1)) &&) $(cmd_$(1))
|
||||
|
||||
quiet_cmd_compile = ' [$(CC_COLOR)CC$(END_COLOR)] $(RELPATH)$@'
|
||||
masq_cmd_compile = $(COMPILE) -c $<
|
||||
cmd_compile = $(COMPILE) -Wp,-MD,.deps/$(*F).pp -c $<
|
||||
masq_cmd_compile = $(COMPILE) -o $(@) -c $< $(2)
|
||||
cmd_compile = $(COMPILE) -o $(@) -Wp,-MD,.deps/$(*F).pp -c $< $(2)
|
||||
|
||||
# Rule to compile a set of .o files into one .o file
|
||||
quiet_cmd_ld_objs = " [$(LD_COLOR)LD$(END_COLOR)] $(RELPATH)$@"
|
||||
@ -153,6 +153,7 @@ clean-test:
|
||||
test-default:
|
||||
|
||||
ifdef TEST_PROGS
|
||||
TESTDEPS-$(CONFIG_DEBUG) += $(top_builddir)/src/util/memdebug.o
|
||||
TESTDEPS += $(TESTDEPS-yes)
|
||||
|
||||
TESTS = $(wildcard $(srcdir)test-*)
|
||||
|
122
doc/Makefile
122
doc/Makefile
@ -14,6 +14,7 @@ TXT_DIR = $(top_srcdir)/doc/txt
|
||||
|
||||
DOC_DIRS = \
|
||||
$(HTML_DIR) \
|
||||
$(HTML_DIR)/api \
|
||||
$(MAN_DIR)/man1 \
|
||||
$(MAN_DIR)/man5 \
|
||||
$(TXT_DIR) \
|
||||
@ -51,74 +52,43 @@ FEATURES = $(top_srcdir)/features.conf
|
||||
### Scripts
|
||||
#
|
||||
|
||||
CODE2DOC = $(top_srcdir)/doc/tools/code2doc
|
||||
HELP2DOC = $(top_srcdir)/doc/tools/help2doc
|
||||
IMPORT_FEATURES_CONF = $(top_srcdir)/doc/tools/import-features.conf
|
||||
MAKE_ELINKS_MANPAGE = $(top_srcdir)/doc/tools/make-elinks-manpage
|
||||
MAKE_ELINKSKEYS_MANPAGE = $(top_srcdir)/doc/tools/make-elinkskeys-manpage
|
||||
|
||||
ifeq ($(CONFIG_ASCIIDOC),yes)
|
||||
HTML_DOCS_WITH_ASCIIDOC = \
|
||||
$(HTML_DIR)/elinks.1.html \
|
||||
$(HTML_DIR)/elinkskeys.5.html \
|
||||
$(HTML_DIR)/hacking.html \
|
||||
$(HTML_DIR)/manual.html
|
||||
endif
|
||||
HTML_DOCS-$(CONFIG_ASCIIDOC) += \
|
||||
api/dom-sgml-parser.html \
|
||||
elinks.1.html \
|
||||
elinkskeys.5.html \
|
||||
hacking.html \
|
||||
manual.html
|
||||
|
||||
HTML_DOCS-$(CONFIG_XMLTO) += \
|
||||
manual.html-chunked
|
||||
|
||||
HTML_DOCS-$(CONFIG_POD2HTML) += \
|
||||
perl.html \
|
||||
perl-hooks.html
|
||||
|
||||
MAN_DOCS-$(CONFIG_XMLTO) += \
|
||||
man1/elinks.1.in \
|
||||
man5/elinkskeys.5
|
||||
|
||||
# Only jw is used for generating PDF.
|
||||
ifeq ($(CONFIG_XMLTO),yes)
|
||||
HTML_DOCS_WITH_XMLTO = \
|
||||
$(HTML_DIR)/manual.html-chunked
|
||||
PDF_DOCS-$(CONFIG_JW) += \
|
||||
manual.pdf
|
||||
|
||||
MAN_DOCS_WITH_XMLTO = \
|
||||
$(MAN_DIR)/man1/elinks.1.in \
|
||||
$(MAN_DIR)/man5/elinkskeys.5
|
||||
endif
|
||||
MAN_DOCS += man5/elinks.conf.5
|
||||
|
||||
# Only jw is used for generating PDF.
|
||||
ifeq ($(CONFIG_JW),yes)
|
||||
PDF_DOCS_WITH_JW = \
|
||||
$(PDF_DIR)/manual.pdf
|
||||
endif
|
||||
MAN_DOCS += $(MAN_DOCS-yes)
|
||||
HTML_DOCS += $(HTML_DOCS-yes)
|
||||
PDF_DOCS += $(PDF_DOCS-yes)
|
||||
|
||||
ifeq ($(CONFIG_POD2HTML),yes)
|
||||
HTML_DOCS_WITH_POD2HTML = \
|
||||
$(HTML_DIR)/perl.html \
|
||||
$(HTML_DIR)/perl-hooks.html
|
||||
endif
|
||||
|
||||
MAN_DOCS_WITH_SHELL = $(MAN_DIR)/man5/elinks.conf.5
|
||||
|
||||
MAN_DOCS = \
|
||||
$(MAN_DOCS_WITH_SHELL)
|
||||
$(MAN_DOCS_WITH_ASCIIDOC)
|
||||
|
||||
HTML_DOCS = \
|
||||
$(HTML_DOCS_WITH_ASCIIDOC) \
|
||||
$(HTML_DOCS_WITH_POD2HTML) \
|
||||
$(HTML_DOCS_WITH_JW)
|
||||
|
||||
PDF_DOCS = \
|
||||
$(PDF_DOCS_WITH_JW)
|
||||
|
||||
html-asciidoc-yes: doc-dirs $(HTML_DOCS_WITH_ASCIIDOC)
|
||||
html-asciidoc-no:
|
||||
|
||||
html-pod2html-yes: doc-dirs $(HTML_DOCS_WITH_POD2HTML)
|
||||
html-pod2html-no:
|
||||
|
||||
html-xmlto-yes: doc-dirs $(HTML_DOCS_WITH_XMLTO)
|
||||
html-xmlto-no:
|
||||
|
||||
man-xmlto-yes: doc-dirs $(MAN_DOCS_WITH_XMLTO)
|
||||
man-xmlto-no:
|
||||
|
||||
pdf-jw-yes: doc-dirs $(PDF_DOCS_WITH_JW)
|
||||
pdf-jw-no:
|
||||
|
||||
man-docs: man-xmlto-$(CONFIG_XMLTO) $(MAN_DOCS_WITH_SHELL)
|
||||
html-docs: html-asciidoc-$(CONFIG_ASCIIDOC) html-xmlto-$(CONFIG_XMLTO) html-pod2html-$(CONFIG_POD2HTML)
|
||||
pdf-docs: pdf-jw-$(CONFIG_JW)
|
||||
man-docs: doc-dirs $(addprefix $(MAN_DIR)/,$(MAN_DOCS))
|
||||
html-docs: doc-dirs $(addprefix $(HTML_DIR)/,$(HTML_DOCS))
|
||||
pdf-docs: doc-dirs $(addprefix $(PDF_DIR)/,$(PDF_DOCS))
|
||||
|
||||
all-docs: man-docs html-docs pdf-docs
|
||||
|
||||
@ -135,36 +105,36 @@ clean-local:
|
||||
|
||||
# Autogenerated asciidoc files.
|
||||
|
||||
$(TXT_DIR)/import-features.conf.txt: $(FEATURES) doc-dirs $(IMPORT_FEATURES_CONF)
|
||||
$(TXT_DIR)/import-features.conf.txt: $(FEATURES) $(IMPORT_FEATURES_CONF)
|
||||
$(IMPORT_FEATURES_CONF) > $@
|
||||
|
||||
$(TXT_DIR)/elinks.1.%.txt: $(MAKE_ELINKS_MANPAGE) doc-dirs $(ELINKS)
|
||||
$(TXT_DIR)/elinks.1.%.txt: $(MAKE_ELINKS_MANPAGE) $(ELINKS)
|
||||
$(MAKE_ELINKS_MANPAGE) $@ $(ELINKS) $(HELP2DOC) > $@
|
||||
|
||||
$(TXT_DIR)/elinkskeys.5.%.txt: $(MAKE_ELINKSKEYS_MANPAGE) doc-dirs $(KBDBIND)
|
||||
$(TXT_DIR)/elinkskeys.5.%.txt: $(MAKE_ELINKSKEYS_MANPAGE) $(KBDBIND)
|
||||
$(MAKE_ELINKSKEYS_MANPAGE) $@ $(KBDBIND) > $@
|
||||
|
||||
|
||||
# Man Pages
|
||||
|
||||
$(XML_DIR)/%.man.xml: $(TXT_DIR)/%.man.txt doc-dirs
|
||||
$(XML_DIR)/%.man.xml: $(TXT_DIR)/%.man.txt
|
||||
$(ASCIIDOC) -b docbook -d manpage -o $@ $<
|
||||
|
||||
$(MAN_DIR)/man1/elinks.1.in: $(XML_DIR)/elinks.1.man.xml doc-dirs
|
||||
$(MAN_DIR)/man1/elinks.1.in: $(XML_DIR)/elinks.1.man.xml
|
||||
$(XMLTO) -o $(MAN_DIR)/man1 man $<
|
||||
mv $(MAN_DIR)/man1/elinks.1 $@
|
||||
|
||||
$(MAN_DIR)/man5/elinkskeys.5: $(XML_DIR)/elinkskeys.5.man.xml doc-dirs
|
||||
$(MAN_DIR)/man5/elinkskeys.5: $(XML_DIR)/elinkskeys.5.man.xml
|
||||
$(XMLTO) -o $(MAN_DIR)/man5 man $<
|
||||
sed -e 's/\\fI\\fR'\''/\\fI\\'\''\\fR/' < $@ > $@.tmp
|
||||
mv $@.tmp $@
|
||||
|
||||
$(MAN_DIR)/man5/elinks.conf.5: doc-dirs $(ELINKS)
|
||||
$(MAN_DIR)/man5/elinks.conf.5: $(ELINKS)
|
||||
$(HELP2DOC) --elinks=$(ELINKS) --elinksconf > $@
|
||||
|
||||
# XHTML/CSS Man Pages
|
||||
|
||||
$(HTML_DIR)/%.html: $(TXT_DIR)/%.html.txt doc-dirs
|
||||
$(HTML_DIR)/%.html: $(TXT_DIR)/%.html.txt
|
||||
$(ASCIIDOC) -b xhtml11 -d manpage -o $@ $<
|
||||
|
||||
# The Manual
|
||||
@ -174,28 +144,34 @@ MANUAL_EXTRA_FILES = \
|
||||
$(TXT_DIR)/elinks.1.html.txt \
|
||||
$(TXT_DIR)/elinkskeys.5.html.txt
|
||||
|
||||
$(HTML_DIR)/manual.html: $(MANUAL_FILES) doc-dirs $(MANUAL_EXTRA_FILES)
|
||||
$(HTML_DIR)/manual.html: $(MANUAL_FILES) $(MANUAL_EXTRA_FILES)
|
||||
$(ASCIIDOC) -b xhtml11 -d book -o $@ -n $<
|
||||
|
||||
$(HTML_DIR)/hacking.html: $(top_srcdir)/doc/hacking.txt doc-dirs
|
||||
$(HTML_DIR)/hacking.html: $(top_srcdir)/doc/hacking.txt
|
||||
$(ASCIIDOC) -b xhtml11 -d book -o $@ -n $<
|
||||
|
||||
$(HTML_DIR)/dev-intro.html: $(top_srcdir)/doc/dev-intro.txt doc-dirs
|
||||
$(HTML_DIR)/dev-intro.html: $(top_srcdir)/doc/dev-intro.txt
|
||||
$(ASCIIDOC) -b xhtml11 -d book -o $@ -n $<
|
||||
|
||||
$(XML_DIR)/manual.xml: $(MANUAL_FILES) doc-dirs $(MANUAL_EXTRA_FILES)
|
||||
$(XML_DIR)/manual.xml: $(MANUAL_FILES) $(MANUAL_EXTRA_FILES)
|
||||
$(ASCIIDOC) -b docbook -d book -o $@ $<
|
||||
|
||||
$(HTML_DIR)/manual.html-chunked: $(XML_DIR)/manual.xml doc-dirs
|
||||
$(HTML_DIR)/manual.html-chunked: $(XML_DIR)/manual.xml
|
||||
$(XMLTO) -o $@ html $<
|
||||
|
||||
$(PDF_DIR)/manual.pdf: $(XML_DIR)/manual.xml doc-dirs
|
||||
$(PDF_DIR)/manual.pdf: $(XML_DIR)/manual.xml
|
||||
$(JW) -o $(PDF_DIR) -b pdf $<
|
||||
|
||||
$(HTML_DIR)/perl.html: $(top_srcdir)/doc/perl.pod doc-dirs
|
||||
$(HTML_DIR)/perl.html: $(top_srcdir)/doc/perl.pod
|
||||
$(POD2HTML) --outfile=$@ < $<
|
||||
|
||||
$(HTML_DIR)/perl-hooks.html: $(top_srcdir)/contrib/perl/hooks.pl doc-dirs
|
||||
$(HTML_DIR)/perl-hooks.html: $(top_srcdir)/contrib/perl/hooks.pl
|
||||
$(POD2HTML) --outfile=$@ < $<
|
||||
|
||||
## API Docs
|
||||
#
|
||||
|
||||
$(HTML_DIR)/api/dom-sgml-parser.html: $(top_srcdir)/src/dom/sgml/parser.h
|
||||
$(CODE2DOC) $< | $(ASCIIDOC) -f code2doc.conf -b xhtml11 -d book -o $@ -n -
|
||||
|
||||
include $(top_srcdir)/Makefile.lib
|
||||
|
52
doc/code2doc.conf
Normal file
52
doc/code2doc.conf
Normal file
@ -0,0 +1,52 @@
|
||||
[specialwords]
|
||||
emphasizedwords=\bAsciiDoc\b
|
||||
monospacedwords=\basciidoc\(1\)
|
||||
|
||||
[id-inlinemacro]
|
||||
<a id="{0}" href="#{0}">{0}</a>
|
||||
|
||||
[enum-inlinemacro]
|
||||
<a id="{0}" href="#{0}">enum {0}</a>
|
||||
|
||||
[func-inlinemacro]
|
||||
<a id="{0}" href="#{0}">{0}()</a>
|
||||
|
||||
[struct-inlinemacro]
|
||||
<a id="{0}" href="#{0}">struct {0}</a>
|
||||
|
||||
[callback-inlinemacro]
|
||||
<a id="{0}" href="#{0}">callback {0}</a>
|
||||
|
||||
[ref-inlinemacro]
|
||||
<a href="{target}#{0}">{0}</a>
|
||||
|
||||
[replacements]
|
||||
(^|[^-])--($|[^-])=\1--\2
|
||||
|
||||
[tags]
|
||||
ilisttext=|
|
||||
olisttext=|
|
||||
vlisttext=|
|
||||
qlisttext=|
|
||||
colisttext=|
|
||||
|
||||
[tags]
|
||||
title1=<h1>|</h1>
|
||||
title2=<h2>|</h2>
|
||||
title3=<h3>|</h3>
|
||||
|
||||
[literalparagraph]
|
||||
<table border="1" class="code"><tr><td><pre>
|
||||
|
|
||||
</pre></td></tr></table>
|
||||
|
||||
[listingblock]
|
||||
<p><b>{title}</b></p>
|
||||
<table border="1" class="code"><tr><td><pre>
|
||||
|
|
||||
</pre></td></tr></table>
|
||||
|
||||
[noteblock]
|
||||
<div><p><b>{title}</b></p>
|
||||
|
|
||||
</div>
|
46
doc/tools/code2doc
Executable file
46
doc/tools/code2doc
Executable file
@ -0,0 +1,46 @@
|
||||
#!/usr/bin/perl -w
|
||||
use strict;
|
||||
use warnings;
|
||||
use diagnostics;
|
||||
|
||||
print "Usage: $0 [FILE]\n\tParses [FILE], outputing the result to stdout.\n"
|
||||
and exit if not @ARGV;
|
||||
|
||||
my ($input) = @ARGV;
|
||||
my ($found, $start, $first, $gotone, $idpath);
|
||||
print "Copyleft© 2006, Russ Rowan (See `COPYING')\n" and exit if $input eq '-v';
|
||||
open FILEIN, "<$input" or print "File `$input' was not found.\n" and exit;
|
||||
$idpath = '';
|
||||
while (<FILEIN>)
|
||||
{
|
||||
if ($found)
|
||||
{
|
||||
if ($_ =~ /^\s+\*\s$/) { next if $first; $_ =~ s/\s\*// if not $first; }
|
||||
if ($_ =~ /^\s\*+\/$/ or $_ !~ /^\s/) { $found = undef; next; }
|
||||
$_ =~ s/^(\s*)\s\*\s/$1/;
|
||||
$found = 'sorta' if $_ =~ s/\s*\*\/$/\n/; $first = undef;
|
||||
}
|
||||
elsif ($_ =~ /^\s*\/\*\*\s(.*)/)
|
||||
{
|
||||
$_ = $1; $first = 1;
|
||||
print STDOUT "\n\n" if $start;
|
||||
if ($_ =~ s/\s*\*\/$//) { $found = 'sorta'; } else { $found = $.; }
|
||||
if ($_ =~ /struct:[[]([^\]]+)[\]]/) { $idpath = "$1."; } else { $idpath = ''; }
|
||||
if ($_ =~ /::/) { $_ = "$_\n\n"; }
|
||||
else
|
||||
{
|
||||
my $dash; for (my $x = 0; $x < length($_); $x++) { $dash .= '-'; }
|
||||
$_ = "$_\n$dash\n\n";
|
||||
}
|
||||
}
|
||||
elsif ($_ =~ /^(\s|[^\s=]+)*[\s*]([A-Za-z0-9_]+)(\s+=\s+[^,;]+)?[,;]\s*\/\*::\s*(.*)\s+\*\/$/)
|
||||
{
|
||||
print STDOUT "\n" if $gotone;
|
||||
$_ = "\nid:[$idpath$2]::\n\t$4\n";
|
||||
$found = 'sorta'; $gotone = $.;
|
||||
}
|
||||
print STDOUT "\n" and $gotone = undef if $gotone and $gotone < $.;
|
||||
next if not $found; $found = undef if $found eq 'sorta';
|
||||
print STDOUT $_ and $start = 1;
|
||||
}
|
||||
close FILEIN;
|
@ -121,52 +121,9 @@ refresh_hotkeys(struct terminal *term, struct menu *menu)
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Returns true if key (upcased) matches one of the hotkeys in menu */
|
||||
static int
|
||||
is_hotkey(struct menu_item *item, unsigned char key, struct terminal *term)
|
||||
{
|
||||
unsigned char *text;
|
||||
int key_pos;
|
||||
|
||||
assert(item);
|
||||
if_assert_failed return 0;
|
||||
|
||||
if (!mi_has_left_text(item)) return 0;
|
||||
|
||||
text = item->text;
|
||||
if (mi_text_translate(item)) text = _(text, term);
|
||||
if (!text || !*text) return 0;
|
||||
|
||||
key_pos = item->hotkey_pos;
|
||||
|
||||
#ifdef CONFIG_DEBUG
|
||||
if (key_pos < 0) key_pos = -key_pos;
|
||||
#endif
|
||||
|
||||
return (key_pos && (toupper(text[key_pos]) == key));
|
||||
}
|
||||
|
||||
/* Returns true if key (upcased) matches first letter of menu item left text. */
|
||||
static int
|
||||
is_not_so_hotkey(struct menu_item *item, unsigned char key, struct terminal *term)
|
||||
{
|
||||
unsigned char *text;
|
||||
|
||||
assert(item);
|
||||
if_assert_failed return 0;
|
||||
|
||||
if (!mi_has_left_text(item)) return 0;
|
||||
|
||||
text = item->text;
|
||||
if (mi_text_translate(item)) text = _(text, term);
|
||||
if (!text || !*text) return 0;
|
||||
|
||||
return (toupper(*text) == key);
|
||||
}
|
||||
|
||||
static int
|
||||
check_hotkeys_common(struct menu *menu, unsigned char hotkey, struct terminal *term,
|
||||
int (*func)(struct menu_item *, unsigned char, struct terminal *))
|
||||
int check_mode)
|
||||
{
|
||||
unsigned char key = toupper(hotkey);
|
||||
int i = menu->selected;
|
||||
@ -179,9 +136,37 @@ check_hotkeys_common(struct menu *menu, unsigned char hotkey, struct terminal *t
|
||||
|
||||
start = i;
|
||||
do {
|
||||
struct menu_item *item;
|
||||
unsigned char *text;
|
||||
int found;
|
||||
|
||||
if (++i == menu->size) i = 0;
|
||||
|
||||
if (func(&menu->items[i], key, term)) {
|
||||
item = &menu->items[i];
|
||||
|
||||
if (!mi_has_left_text(item)) continue;
|
||||
|
||||
text = item->text;
|
||||
if (mi_text_translate(item)) text = _(text, term);
|
||||
if (!text || !*text) continue;
|
||||
|
||||
if (check_mode == 0) {
|
||||
/* Does the key (upcased) matches one of the
|
||||
* hotkeys in menu ? */
|
||||
int key_pos = item->hotkey_pos;
|
||||
|
||||
#ifdef CONFIG_DEBUG
|
||||
if (key_pos < 0) key_pos = -key_pos;
|
||||
#endif
|
||||
found = (key_pos && (toupper(text[key_pos]) == key));
|
||||
|
||||
} else {
|
||||
/* Does the key (upcased) matches first letter
|
||||
* of menu item left text ? */
|
||||
found = (toupper(*text) == key);
|
||||
}
|
||||
|
||||
if (found) {
|
||||
menu->selected = i;
|
||||
return 1;
|
||||
}
|
||||
@ -195,7 +180,7 @@ check_hotkeys_common(struct menu *menu, unsigned char hotkey, struct terminal *t
|
||||
int
|
||||
check_hotkeys(struct menu *menu, unsigned char key, struct terminal *term)
|
||||
{
|
||||
return check_hotkeys_common(menu, key, term, is_hotkey);
|
||||
return check_hotkeys_common(menu, key, term, 0);
|
||||
}
|
||||
|
||||
/* Search if first letter of an entry in menu matches the key (caseless comp.).
|
||||
@ -205,5 +190,5 @@ check_hotkeys(struct menu *menu, unsigned char key, struct terminal *term)
|
||||
int
|
||||
check_not_so_hot_keys(struct menu *menu, unsigned char key, struct terminal *term)
|
||||
{
|
||||
return check_hotkeys_common(menu, key, term, is_not_so_hotkey);
|
||||
return check_hotkeys_common(menu, key, term, 1);
|
||||
}
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "document/renderer.h"
|
||||
#include "dom/scanner.h"
|
||||
#include "dom/sgml/parser.h"
|
||||
#include "dom/sgml/rss/rss.h"
|
||||
#include "dom/node.h"
|
||||
#include "dom/stack.h"
|
||||
#include "intl/charsets.h"
|
||||
@ -54,6 +55,13 @@ struct dom_renderer {
|
||||
unsigned int find_url:1;
|
||||
#endif
|
||||
struct screen_char styles[DOM_NODES];
|
||||
|
||||
/* RSS renderer variables */
|
||||
struct dom_node *channel;
|
||||
struct dom_node_list *items;
|
||||
struct dom_node *item;
|
||||
struct dom_node *node;
|
||||
struct dom_string text;
|
||||
};
|
||||
|
||||
#define URL_REGEX "(file://|((f|ht|nt)tp(s)?|smb)://[[:alnum:]]+([-@:.]?[[:alnum:]])*\\.[[:alpha:]]{2,4}(:[[:digit:]]+)?)(/(%[[:xdigit:]]{2}|[-_~&=;?.a-z0-9])*)*"
|
||||
@ -334,7 +342,8 @@ render_dom_text(struct dom_renderer *renderer, struct screen_char *template,
|
||||
ALIGN_LINK(&(doc)->links, (doc)->nlinks, size)
|
||||
|
||||
static inline struct link *
|
||||
add_dom_link(struct dom_renderer *renderer, unsigned char *string, int length)
|
||||
add_dom_link(struct dom_renderer *renderer, unsigned char *string, int length,
|
||||
unsigned char *uristring, int urilength)
|
||||
{
|
||||
struct document *document = renderer->document;
|
||||
int x = renderer->canvas_x;
|
||||
@ -343,7 +352,6 @@ add_dom_link(struct dom_renderer *renderer, unsigned char *string, int length)
|
||||
struct link *link;
|
||||
struct point *point;
|
||||
struct screen_char template;
|
||||
unsigned char *uristring;
|
||||
color_T fgcolor;
|
||||
|
||||
if (!realloc_document_links(document, document->nlinks + 1))
|
||||
@ -355,7 +363,7 @@ add_dom_link(struct dom_renderer *renderer, unsigned char *string, int length)
|
||||
return NULL;
|
||||
|
||||
uristring = convert_string(renderer->convert_table,
|
||||
string, length, document->options.cp,
|
||||
uristring, urilength, document->options.cp,
|
||||
CSM_DEFAULT, NULL, NULL, NULL);
|
||||
if (!uristring) return NULL;
|
||||
|
||||
@ -479,7 +487,7 @@ render_dom_node_enhanced_text(struct dom_renderer *renderer, struct dom_node *no
|
||||
string += offset;
|
||||
length -= offset;
|
||||
|
||||
add_dom_link(renderer, string, matchlen);
|
||||
add_dom_link(renderer, string, matchlen, string, matchlen);
|
||||
|
||||
length -= matchlen;
|
||||
string += matchlen;
|
||||
@ -601,7 +609,8 @@ render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void
|
||||
break;
|
||||
}
|
||||
|
||||
add_dom_link(renderer, value, valuelen - skips);
|
||||
add_dom_link(renderer, value, valuelen - skips,
|
||||
value, valuelen - skips);
|
||||
|
||||
if (skips > 0) {
|
||||
value += valuelen - skips;
|
||||
@ -682,6 +691,272 @@ static struct dom_stack_context_info dom_source_renderer_context_info = {
|
||||
};
|
||||
|
||||
|
||||
/* DOM RSS Renderer */
|
||||
|
||||
static void
|
||||
dom_rss_push_element(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
{
|
||||
struct dom_renderer *renderer = stack->current->data;
|
||||
|
||||
assert(node && renderer && renderer->document);
|
||||
|
||||
switch (node->data.element.type) {
|
||||
case RSS_ELEMENT_CHANNEL:
|
||||
/* The stack should have: #document * channel */
|
||||
if (stack->depth != 3)
|
||||
break;
|
||||
|
||||
if (!renderer->channel) {
|
||||
renderer->channel = node;
|
||||
}
|
||||
break;
|
||||
|
||||
case RSS_ELEMENT_ITEM:
|
||||
/* The stack should have: #document * channel item */
|
||||
#if 0
|
||||
/* Don't be so strict ... */
|
||||
if (stack->depth != 4)
|
||||
break;
|
||||
#endif
|
||||
/* ... but be exclusive. */
|
||||
if (renderer->item)
|
||||
break;
|
||||
add_to_dom_node_list(&renderer->items, node, -1);
|
||||
renderer->item = node;
|
||||
break;
|
||||
|
||||
case RSS_ELEMENT_LINK:
|
||||
case RSS_ELEMENT_DESCRIPTION:
|
||||
case RSS_ELEMENT_TITLE:
|
||||
case RSS_ELEMENT_AUTHOR:
|
||||
case RSS_ELEMENT_PUBDATE:
|
||||
if (!node->parent || renderer->node != node->parent)
|
||||
break;
|
||||
|
||||
renderer->node = node;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
dom_rss_pop_element(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
{
|
||||
struct dom_renderer *renderer = stack->current->data;
|
||||
struct dom_node_list **list;
|
||||
|
||||
assert(node && renderer && renderer->document);
|
||||
|
||||
switch (node->data.element.type) {
|
||||
case RSS_ELEMENT_ITEM:
|
||||
if (is_dom_string_set(&renderer->text))
|
||||
done_dom_string(&renderer->text);
|
||||
renderer->item = NULL;
|
||||
break;
|
||||
|
||||
case RSS_ELEMENT_LINK:
|
||||
case RSS_ELEMENT_DESCRIPTION:
|
||||
case RSS_ELEMENT_TITLE:
|
||||
case RSS_ELEMENT_AUTHOR:
|
||||
case RSS_ELEMENT_PUBDATE:
|
||||
if (!is_dom_string_set(&renderer->text)
|
||||
|| !node->parent
|
||||
|| renderer->item != node->parent
|
||||
|| renderer->node != node)
|
||||
break;
|
||||
|
||||
/* Replace any child nodes with the normalized text node. */
|
||||
list = get_dom_node_list(node->parent, node);
|
||||
done_dom_node_list(*list);
|
||||
if (is_dom_string_set(&renderer->text)) {
|
||||
if (!add_dom_node(node, DOM_NODE_TEXT, &renderer->text))
|
||||
done_dom_string(&renderer->text);
|
||||
}
|
||||
renderer->node = NULL;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
dom_rss_push_content(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
{
|
||||
struct dom_renderer *renderer = stack->current->data;
|
||||
unsigned char *string = node->string.string;
|
||||
int length = node->string.length;
|
||||
|
||||
assert(node && renderer && renderer->document);
|
||||
|
||||
if (!renderer->node)
|
||||
return;
|
||||
|
||||
if (node->type == DOM_NODE_ENTITY_REFERENCE) {
|
||||
string -= 1;
|
||||
length += 2;
|
||||
}
|
||||
|
||||
if (!is_dom_string_set(&renderer->text)) {
|
||||
init_dom_string(&renderer->text, string, length);
|
||||
} else {
|
||||
add_to_dom_string(&renderer->text, string, length);
|
||||
}
|
||||
}
|
||||
|
||||
static struct dom_string *
|
||||
get_rss_node_text(struct dom_node *node)
|
||||
{
|
||||
struct dom_node *child;
|
||||
int index;
|
||||
|
||||
if (!node->data.element.children)
|
||||
return NULL;
|
||||
|
||||
foreach_dom_node (node->data.element.children, child, index) {
|
||||
if (child->type == DOM_NODE_TEXT)
|
||||
return &child->string;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct dom_node *
|
||||
get_rss_child(struct dom_node *parent, enum rss_element_type type)
|
||||
{
|
||||
struct dom_node *node;
|
||||
int index;
|
||||
|
||||
if (!parent->data.element.children)
|
||||
return NULL;
|
||||
|
||||
foreach_dom_node (parent->data.element.children, node, index) {
|
||||
if (node->type == DOM_NODE_ELEMENT
|
||||
&& type == node->data.element.type)
|
||||
return node;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
static struct dom_string *
|
||||
get_rss_text(struct dom_node *node, enum rss_element_type type)
|
||||
{
|
||||
node = get_rss_child(node, type);
|
||||
|
||||
return node ? get_rss_node_text(node) : NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
render_rss_item(struct dom_renderer *renderer, struct dom_node *item)
|
||||
{
|
||||
struct dom_string *title = get_rss_text(item, RSS_ELEMENT_TITLE);
|
||||
struct dom_string *link = get_rss_text(item, RSS_ELEMENT_LINK);
|
||||
struct dom_string *author = get_rss_text(item, RSS_ELEMENT_AUTHOR);
|
||||
struct dom_string *date = get_rss_text(item, RSS_ELEMENT_PUBDATE);
|
||||
|
||||
if (title && is_dom_string_set(title)) {
|
||||
render_dom_text(renderer, &renderer->styles[DOM_NODE_ELEMENT],
|
||||
title->string, title->length);
|
||||
}
|
||||
|
||||
if (link && is_dom_string_set(link)) {
|
||||
X(renderer)++;
|
||||
add_dom_link(renderer, "[link]", 6, link->string, link->length);
|
||||
}
|
||||
|
||||
/* New line, and indent */
|
||||
Y(renderer)++;
|
||||
X(renderer) = 0;
|
||||
|
||||
if (author && is_dom_string_set(author)) {
|
||||
render_dom_text(renderer, &renderer->styles[DOM_NODE_COMMENT],
|
||||
author->string, author->length);
|
||||
}
|
||||
|
||||
if (date && is_dom_string_set(date)) {
|
||||
if (author && is_dom_string_set(author)) {
|
||||
render_dom_text(renderer, &renderer->styles[DOM_NODE_COMMENT],
|
||||
" - ", 3);
|
||||
}
|
||||
|
||||
render_dom_text(renderer, &renderer->styles[DOM_NODE_COMMENT],
|
||||
date->string, date->length);
|
||||
}
|
||||
|
||||
if ((author && is_dom_string_set(author))
|
||||
|| (date && is_dom_string_set(date))) {
|
||||
/* New line, and indent */
|
||||
Y(renderer)++;
|
||||
X(renderer) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
dom_rss_pop_document(struct dom_stack *stack, struct dom_node *root, void *data)
|
||||
{
|
||||
struct dom_renderer *renderer = stack->current->data;
|
||||
|
||||
if (!renderer->channel)
|
||||
return;
|
||||
|
||||
render_rss_item(renderer, renderer->channel);
|
||||
|
||||
if (renderer->items) {
|
||||
struct dom_node *node;
|
||||
int index;
|
||||
|
||||
foreach_dom_node (renderer->items, node, index) {
|
||||
Y(renderer)++;
|
||||
X(renderer) = 0;
|
||||
render_rss_item(renderer, node);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_dom_string_set(&renderer->text))
|
||||
done_dom_string(&renderer->text);
|
||||
mem_free_if(renderer->items);
|
||||
|
||||
done_dom_node(root);
|
||||
}
|
||||
|
||||
|
||||
static struct dom_stack_context_info dom_rss_renderer_context_info = {
|
||||
/* Object size: */ 0,
|
||||
/* Push: */
|
||||
{
|
||||
/* */ NULL,
|
||||
/* DOM_NODE_ELEMENT */ dom_rss_push_element,
|
||||
/* DOM_NODE_ATTRIBUTE */ NULL,
|
||||
/* DOM_NODE_TEXT */ dom_rss_push_content,
|
||||
/* DOM_NODE_CDATA_SECTION */ dom_rss_push_content,
|
||||
/* DOM_NODE_ENTITY_REFERENCE */ dom_rss_push_content,
|
||||
/* DOM_NODE_ENTITY */ NULL,
|
||||
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
|
||||
/* DOM_NODE_COMMENT */ NULL,
|
||||
/* DOM_NODE_DOCUMENT */ NULL,
|
||||
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
|
||||
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
|
||||
/* DOM_NODE_NOTATION */ NULL,
|
||||
},
|
||||
/* Pop: */
|
||||
{
|
||||
/* */ NULL,
|
||||
/* DOM_NODE_ELEMENT */ dom_rss_pop_element,
|
||||
/* DOM_NODE_ATTRIBUTE */ NULL,
|
||||
/* DOM_NODE_TEXT */ NULL,
|
||||
/* DOM_NODE_CDATA_SECTION */ NULL,
|
||||
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
|
||||
/* DOM_NODE_ENTITY */ NULL,
|
||||
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
|
||||
/* DOM_NODE_COMMENT */ NULL,
|
||||
/* DOM_NODE_DOCUMENT */ dom_rss_pop_document,
|
||||
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
|
||||
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
|
||||
/* DOM_NODE_NOTATION */ NULL,
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/* Shared multiplexor between renderers */
|
||||
void
|
||||
render_dom_document(struct cache_entry *cached, struct document *document,
|
||||
@ -692,14 +967,13 @@ render_dom_document(struct cache_entry *cached, struct document *document,
|
||||
struct conv_table *convert_table;
|
||||
struct sgml_parser *parser;
|
||||
enum sgml_document_type doctype;
|
||||
enum sgml_parser_type parser_type;
|
||||
unsigned char *string = struri(cached->uri);
|
||||
size_t length = strlen(string);
|
||||
struct dom_string uri = INIT_DOM_STRING(string, length);
|
||||
struct dom_string source = INIT_DOM_STRING(buffer->source, buffer->length);
|
||||
enum sgml_parser_code code;
|
||||
|
||||
assert(document->options.plain);
|
||||
|
||||
convert_table = get_convert_table(head, document->options.cp,
|
||||
document->options.assume_cp,
|
||||
&document->cp,
|
||||
@ -710,6 +984,11 @@ render_dom_document(struct cache_entry *cached, struct document *document,
|
||||
|
||||
document->bgcolor = document->options.default_bg;
|
||||
|
||||
if (document->options.plain)
|
||||
parser_type = SGML_PARSER_STREAM;
|
||||
else
|
||||
parser_type = SGML_PARSER_TREE;
|
||||
|
||||
/* FIXME: Refactor the doctype lookup. */
|
||||
if (!strcasecmp("application/rss+xml", cached->content_type)) {
|
||||
doctype = SGML_DOCTYPE_RSS;
|
||||
@ -730,11 +1009,17 @@ render_dom_document(struct cache_entry *cached, struct document *document,
|
||||
doctype = SGML_DOCTYPE_HTML;
|
||||
}
|
||||
|
||||
parser = init_sgml_parser(SGML_PARSER_STREAM, doctype, &uri, 0);
|
||||
if (!parser) return;
|
||||
parser = init_sgml_parser(parser_type, doctype, &uri, 0);
|
||||
if (!parser) return;
|
||||
|
||||
if (document->options.plain) {
|
||||
add_dom_stack_context(&parser->stack, &renderer,
|
||||
&dom_source_renderer_context_info);
|
||||
|
||||
add_dom_stack_context(&parser->stack, &renderer,
|
||||
&dom_source_renderer_context_info);
|
||||
} else if (doctype == SGML_DOCTYPE_RSS) {
|
||||
add_dom_stack_context(&parser->stack, &renderer,
|
||||
&dom_rss_renderer_context_info);
|
||||
}
|
||||
|
||||
/* FIXME: When rendering this way we don't really care about the code.
|
||||
* However, it will be useful when we will be able to also
|
||||
|
@ -254,7 +254,13 @@ render_encoded_document(struct cache_entry *cached, struct document *document)
|
||||
render_plain_document(cached, document, &buffer);
|
||||
|
||||
} else {
|
||||
render_html_document(cached, document, &buffer);
|
||||
#ifdef CONFIG_DOM
|
||||
if (cached->content_type
|
||||
&& (!strlcasecmp("application/rss+xml", 19, cached->content_type, -1)))
|
||||
render_dom_document(cached, document, &buffer);
|
||||
else
|
||||
#endif
|
||||
render_html_document(cached, document, &buffer);
|
||||
}
|
||||
|
||||
if (encoding != ENCODING_NONE) {
|
||||
|
@ -155,7 +155,7 @@ init_dom_scanner_info(struct dom_scanner_info *scanner_info)
|
||||
void
|
||||
init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_info,
|
||||
struct dom_string *string, int state, int count_lines, int complete,
|
||||
int check_complete)
|
||||
int check_complete, int detect_errors)
|
||||
{
|
||||
if (!scanner_info->initialized) {
|
||||
init_dom_scanner_info(scanner_info);
|
||||
@ -173,6 +173,7 @@ init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_i
|
||||
scanner->count_lines = !!count_lines;
|
||||
scanner->incomplete = !complete;
|
||||
scanner->check_complete = !!check_complete;
|
||||
scanner->detect_errors = !!detect_errors;
|
||||
scanner->lineno = scanner->count_lines;
|
||||
scanner->info->scan(scanner);
|
||||
}
|
||||
|
@ -20,6 +20,9 @@ struct dom_scanner_token {
|
||||
/* Some precedence value */
|
||||
int precedence;
|
||||
|
||||
/* The line number; used for error tokens */
|
||||
unsigned int lineno;
|
||||
|
||||
/* The start of the token string and the token length */
|
||||
struct dom_string string;
|
||||
};
|
||||
@ -93,7 +96,7 @@ struct dom_scanner_info {
|
||||
/* Initializes the scanner. */
|
||||
void init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_info,
|
||||
struct dom_string *string, int state, int count_lines, int complete,
|
||||
int check_complete);
|
||||
int check_complete, int detect_error);
|
||||
|
||||
/* The number of tokens in the scanners token table:
|
||||
* At best it should be big enough to contain properties with space separated
|
||||
@ -130,6 +133,9 @@ struct dom_scanner {
|
||||
unsigned int check_complete:1; /* Only generate complete tokens */
|
||||
unsigned int incomplete:1; /* The scanned string is incomplete */
|
||||
|
||||
unsigned int detect_errors:1; /* Check for markup errors */
|
||||
unsigned int found_error; /* Did we already report this error? */
|
||||
|
||||
unsigned int count_lines:1; /* Is line counting enbaled? */
|
||||
unsigned int lineno; /* Line # of the last scanned token */
|
||||
|
||||
|
@ -391,7 +391,7 @@ parse_dom_select(struct dom_select *select, struct dom_stack *stack,
|
||||
struct dom_scanner scanner;
|
||||
struct dom_select_node sel;
|
||||
|
||||
init_dom_scanner(&scanner, &dom_css_scanner_info, string, 0, 0, 1, 0);
|
||||
init_dom_scanner(&scanner, &dom_css_scanner_info, string, 0, 0, 1, 0, 0);
|
||||
|
||||
memset(&sel, 0, sizeof(sel));
|
||||
|
||||
|
@ -154,6 +154,17 @@ add_sgml_node(struct dom_stack *stack, enum dom_node_type type, struct dom_scann
|
||||
|
||||
/* SGML parser main handling: */
|
||||
|
||||
static enum sgml_parser_code
|
||||
call_sgml_error_function(struct dom_stack *stack, struct dom_scanner_token *token)
|
||||
{
|
||||
struct sgml_parser *parser = get_sgml_parser(stack);
|
||||
unsigned int line = get_sgml_parser_line_number(parser);
|
||||
|
||||
assert(parser->error_func);
|
||||
|
||||
return parser->error_func(parser, &token->string, line);
|
||||
}
|
||||
|
||||
static inline enum sgml_parser_code
|
||||
parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner)
|
||||
{
|
||||
@ -217,6 +228,17 @@ parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner)
|
||||
case SGML_TOKEN_INCOMPLETE:
|
||||
return SGML_PARSER_CODE_INCOMPLETE;
|
||||
|
||||
case SGML_TOKEN_ERROR:
|
||||
{
|
||||
enum sgml_parser_code code;
|
||||
|
||||
code = call_sgml_error_function(stack, token);
|
||||
if (code != SGML_PARSER_CODE_OK)
|
||||
return code;
|
||||
|
||||
skip_dom_scanner_token(scanner);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
skip_dom_scanner_token(scanner);
|
||||
}
|
||||
@ -314,8 +336,13 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
|
||||
if (!token || token->type == SGML_TOKEN_INCOMPLETE)
|
||||
return SGML_PARSER_CODE_INCOMPLETE;
|
||||
|
||||
assert(token->type == SGML_TOKEN_PROCESS_DATA);
|
||||
if (token->type == SGML_TOKEN_ERROR)
|
||||
break;
|
||||
|
||||
assert(token->type == SGML_TOKEN_PROCESS_DATA);
|
||||
/* Fall-through */
|
||||
|
||||
case SGML_TOKEN_PROCESS_DATA:
|
||||
if (add_sgml_proc_instruction(stack, &target, token)
|
||||
&& (target.type == SGML_TOKEN_PROCESS_XML
|
||||
|| target.type == SGML_TOKEN_PROCESS_XML_STYLESHEET)
|
||||
@ -326,7 +353,7 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
|
||||
/* The attribute souce is complete. */
|
||||
init_dom_scanner(&attr_scanner, &sgml_scanner_info,
|
||||
&token->string, SGML_STATE_ELEMENT,
|
||||
scanner->count_lines, 1, 0);
|
||||
scanner->count_lines, 1, 0, 0);
|
||||
|
||||
if (dom_scanner_has_tokens(&attr_scanner)) {
|
||||
/* Ignore parser codes from this
|
||||
@ -350,6 +377,17 @@ parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
|
||||
case SGML_TOKEN_INCOMPLETE:
|
||||
return SGML_PARSER_CODE_INCOMPLETE;
|
||||
|
||||
case SGML_TOKEN_ERROR:
|
||||
{
|
||||
enum sgml_parser_code code;
|
||||
|
||||
code = call_sgml_error_function(stack, token);
|
||||
if (code != SGML_PARSER_CODE_OK)
|
||||
return code;
|
||||
|
||||
skip_dom_scanner_token(scanner);
|
||||
break;
|
||||
}
|
||||
case SGML_TOKEN_SPACE:
|
||||
case SGML_TOKEN_TEXT:
|
||||
default:
|
||||
@ -403,11 +441,13 @@ sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data)
|
||||
int count_lines = !!(parser->flags & SGML_PARSER_COUNT_LINES);
|
||||
int complete = !!(parser->flags & SGML_PARSER_COMPLETE);
|
||||
int incremental = !!(parser->flags & SGML_PARSER_INCREMENTAL);
|
||||
int detect_errors = !!(parser->flags & SGML_PARSER_DETECT_ERRORS);
|
||||
|
||||
parsing->depth = parser->stack.depth;
|
||||
get_dom_stack_top(&parser->stack)->immutable = 1;
|
||||
init_dom_scanner(&parsing->scanner, &sgml_scanner_info, &node->string,
|
||||
SGML_STATE_TEXT, count_lines, complete, incremental);
|
||||
SGML_STATE_TEXT, count_lines, complete, incremental,
|
||||
detect_errors);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -494,6 +534,11 @@ get_sgml_parser_line_number(struct sgml_parser *parser)
|
||||
|
||||
assert(pstate->scanner.count_lines && pstate->scanner.lineno);
|
||||
|
||||
if (pstate->scanner.current
|
||||
&& pstate->scanner.current < pstate->scanner.table + DOM_SCANNER_TOKENS
|
||||
&& pstate->scanner.current->type == SGML_TOKEN_ERROR)
|
||||
return pstate->scanner.current->lineno;
|
||||
|
||||
return pstate->scanner.lineno;
|
||||
}
|
||||
|
||||
@ -553,6 +598,9 @@ init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (flags & SGML_PARSER_DETECT_ERRORS)
|
||||
flags |= SGML_PARSER_COUNT_LINES;
|
||||
|
||||
parser->type = type;
|
||||
parser->flags = flags;
|
||||
parser->info = get_sgml_info(doctype);
|
||||
|
@ -7,69 +7,150 @@
|
||||
#include "dom/sgml/sgml.h"
|
||||
#include "dom/scanner.h"
|
||||
|
||||
struct sgml_parser;
|
||||
struct string;
|
||||
struct uri;
|
||||
|
||||
/** enum:[sgml_parser_type]: SGML parser type
|
||||
*
|
||||
* There are two kinds of parser types: One that optimises one-time access to
|
||||
* the DOM tree and one that creates a persistent DOM tree. */
|
||||
enum sgml_parser_type {
|
||||
/* The first one is a DOM tree builder. */
|
||||
SGML_PARSER_TREE,
|
||||
/* The second one will simply push nodes on the stack, not building a
|
||||
/** id:[SGML_PARSER_STREAM]::
|
||||
* The first one will simply push nodes on the stack, not building a
|
||||
* DOM tree. This interface is similar to that of SAX (Simple API for
|
||||
* XML) where events are fired when nodes are entered and exited. It is
|
||||
* useful when you are not actually interested in the DOM tree, but can
|
||||
* do all processing in a stream-like manner, such as when highlighting
|
||||
* HTML code. */
|
||||
SGML_PARSER_STREAM,
|
||||
/** id:[SGML_PARSER_TREE]::
|
||||
* The second one is a DOM tree builder, that builds a persistent DOM
|
||||
* tree. When using this type, it is possible to do even more
|
||||
* (pre)processing than for parser streams. For example you can sort
|
||||
* element child nodes, or purge various node such as text nodes that
|
||||
* only contain space characters. */
|
||||
SGML_PARSER_TREE,
|
||||
};
|
||||
|
||||
/** enum:[sgml_parser_flag]: SGML parser flags
|
||||
*
|
||||
* These flags control how the parser behaves.
|
||||
*/
|
||||
enum sgml_parser_flag {
|
||||
SGML_PARSER_COUNT_LINES = 1,
|
||||
SGML_PARSER_COMPLETE = 2,
|
||||
SGML_PARSER_INCREMENTAL = 4,
|
||||
SGML_PARSER_COUNT_LINES = 1, /*:: Make line numbers available. */
|
||||
SGML_PARSER_COMPLETE = 2, /*:: Used internally when incremental. */
|
||||
SGML_PARSER_INCREMENTAL = 4, /*:: Parse chunks of input. */
|
||||
SGML_PARSER_DETECT_ERRORS = 8, /*:: Report errors. */
|
||||
};
|
||||
|
||||
/** struct:[sgml_parser_state]: SGML parser state
|
||||
*
|
||||
* The SGML parser has only little state.
|
||||
*/
|
||||
struct sgml_parser_state {
|
||||
/* Info about the properties of the node contained by state.
|
||||
/** id:[sgml_parser_state.info]::
|
||||
* Info about the properties of the node contained by state.
|
||||
* This is only meaningful to element and attribute nodes. For
|
||||
* unknown nodes it points to the common 'unknown node' info. */
|
||||
struct sgml_node_info *info;
|
||||
/* This is used by the DOM source renderer for highlighting the
|
||||
/** id:[sgml_parser_state.end_token]::
|
||||
* This is used by the DOM source renderer for highlighting the
|
||||
* end-tag of an element. */
|
||||
struct dom_scanner_token end_token;
|
||||
};
|
||||
|
||||
struct sgml_parser {
|
||||
enum sgml_parser_type type; /* Stream or tree */
|
||||
enum sgml_parser_flag flags; /* Flags that control the behaviour */
|
||||
|
||||
struct sgml_info *info; /* Backend dependent info */
|
||||
|
||||
struct dom_string uri; /* The URI of the DOM document */
|
||||
struct dom_node *root; /* The document root node */
|
||||
|
||||
struct dom_stack stack; /* A stack for tracking parsed nodes */
|
||||
struct dom_stack parsing; /* Used for tracking parsing states */
|
||||
};
|
||||
|
||||
struct sgml_parser *
|
||||
init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype,
|
||||
struct dom_string *uri, enum sgml_parser_flag flags);
|
||||
|
||||
void done_sgml_parser(struct sgml_parser *parser);
|
||||
|
||||
/** enum:[sgml_parser_code]: (Error) codes for the SGML parser
|
||||
*
|
||||
* These enum values are used for return codes.
|
||||
*/
|
||||
enum sgml_parser_code {
|
||||
SGML_PARSER_CODE_OK, /* The parsing was successful */
|
||||
SGML_PARSER_CODE_INCOMPLETE, /* The parsing could not be completed */
|
||||
SGML_PARSER_CODE_MEM_ALLOC, /* Failed to allocate memory */
|
||||
SGML_PARSER_CODE_OK, /*:: The parsing was successful */
|
||||
SGML_PARSER_CODE_INCOMPLETE, /*:: The parsing could not be completed */
|
||||
SGML_PARSER_CODE_MEM_ALLOC, /*:: Failed to allocate memory */
|
||||
|
||||
/* FIXME: For when we will add support for requiring stricter parsing
|
||||
/** id:[SGML_PARSER_CODE_ERROR]::
|
||||
* FIXME: For when we will add support for requiring stricter parsing
|
||||
* or even a validator. */
|
||||
SGML_PARSER_CODE_ERROR,
|
||||
};
|
||||
|
||||
/** callback:[sgml_error_T]: SGML error callback
|
||||
*
|
||||
* Called by the SGML parser when a parsing error has occurred.
|
||||
*
|
||||
* If the return code is not ref:[SGML_PARSER_CODE_OK] the parsing will be
|
||||
* ended and that code will be returned. */
|
||||
typedef enum sgml_parser_code
|
||||
(*sgml_error_T)(struct sgml_parser *, struct dom_string *, unsigned int);
|
||||
|
||||
|
||||
/** struct:[sgml_parser]: The SGML parser
|
||||
*
|
||||
* This struct hold info used while parsing SGML data.
|
||||
*
|
||||
* NOTE: The only variable the user should set is ref:[error_func]. */
|
||||
struct sgml_parser {
|
||||
enum sgml_parser_type type; /*:: Stream or tree */
|
||||
enum sgml_parser_flag flags; /*:: Flags that control the behaviour */
|
||||
|
||||
struct sgml_info *info; /*:: Backend dependent info */
|
||||
|
||||
struct dom_string uri; /*:: The URI of the DOM document */
|
||||
struct dom_node *root; /*:: The document root node */
|
||||
|
||||
sgml_error_T error_func; /*:: Called for detected errors */
|
||||
|
||||
struct dom_stack stack; /*:: A stack for tracking parsed nodes */
|
||||
struct dom_stack parsing; /*:: Used for tracking parsing states */
|
||||
};
|
||||
|
||||
|
||||
/** func:[init_sgml_parser]: Initialise an SGML parser
|
||||
*
|
||||
* Initialise an SGML parser with the given properties.
|
||||
*
|
||||
* type:: Stream or tree; one-time or persistant.
|
||||
* doctype:: The document type, this affects what sub type nodes are given.
|
||||
* uri:: The URI of the document root.
|
||||
* flags:: Flags controlling the behaviour of the parser.
|
||||
*
|
||||
* Returns the created parser or NULL.
|
||||
*/
|
||||
struct sgml_parser *
|
||||
init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype,
|
||||
struct dom_string *uri, enum sgml_parser_flag flags);
|
||||
|
||||
/** func:[done_sgml_parser]: Release an SGML parser
|
||||
*
|
||||
* Deallocates all resources, _expect_ the root node.
|
||||
*
|
||||
* parser:: The parser being released.
|
||||
*/
|
||||
void done_sgml_parser(struct sgml_parser *parser);
|
||||
|
||||
/** func:[parse_sgml]: Parse a chunk of SGML source
|
||||
*
|
||||
* Parses the given `buffer`. For incremental rendering the last buffer can be
|
||||
* signals through the `complete` parameter.
|
||||
*
|
||||
* parser:: A parser created with ref:[init_sgml_parser].
|
||||
* buffer:: A string containing the chunk to parse.
|
||||
* complete:: Whether this is the last chunk to parse.
|
||||
*
|
||||
* The returned code is ref:[SGML_PARSER_CODE_OK] if the buffer was
|
||||
* successfully parserd, else a code hinting at the error.
|
||||
*/
|
||||
enum sgml_parser_code
|
||||
parse_sgml(struct sgml_parser *parser, struct dom_string *buffer, int complete);
|
||||
|
||||
/** func:[get_sgml_parser_line_number]: Get the line position in the source
|
||||
*
|
||||
* Returns what line number the parser is currently at or zero if there has
|
||||
* been no parsing yet.
|
||||
*
|
||||
* NOTE: Line numbers are recoderded in the scanner tokens.
|
||||
*/
|
||||
unsigned int get_sgml_parser_line_number(struct sgml_parser *parser);
|
||||
|
||||
#endif
|
||||
|
@ -117,6 +117,86 @@ set_sgml_incomplete(struct dom_scanner *scanner, struct dom_scanner_token *token
|
||||
scanner->position = scanner->end;
|
||||
}
|
||||
|
||||
|
||||
static inline int
|
||||
check_sgml_error(struct dom_scanner *scanner)
|
||||
{
|
||||
unsigned int found_error = scanner->found_error;
|
||||
|
||||
/* Toggle if we found an error previously. */
|
||||
scanner->found_error = 0;
|
||||
|
||||
return scanner->detect_errors && !found_error;
|
||||
}
|
||||
|
||||
static unsigned char *
|
||||
get_sgml_error_end(struct dom_scanner *scanner, enum sgml_token_type type,
|
||||
unsigned char *end)
|
||||
{
|
||||
switch (type) {
|
||||
case SGML_TOKEN_CDATA_SECTION:
|
||||
case SGML_TOKEN_NOTATION_ATTLIST:
|
||||
case SGML_TOKEN_NOTATION_DOCTYPE:
|
||||
case SGML_TOKEN_NOTATION_ELEMENT:
|
||||
if (scanner->position + 9 < end)
|
||||
end = scanner->position + 9;
|
||||
break;
|
||||
|
||||
case SGML_TOKEN_NOTATION_COMMENT:
|
||||
/* Just include the '<!--' part. */
|
||||
if (scanner->position + 4 < end)
|
||||
end = scanner->position + 4;
|
||||
break;
|
||||
|
||||
case SGML_TOKEN_NOTATION_ENTITY:
|
||||
if (scanner->position + 6 < end)
|
||||
end = scanner->position + 6;
|
||||
break;
|
||||
|
||||
case SGML_TOKEN_PROCESS_XML:
|
||||
if (scanner->position + 5 < end)
|
||||
end = scanner->position + 5;
|
||||
break;
|
||||
|
||||
case SGML_TOKEN_PROCESS_XML_STYLESHEET:
|
||||
if (scanner->position + 16 < end)
|
||||
end = scanner->position + 16;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return end;
|
||||
}
|
||||
|
||||
|
||||
static struct dom_scanner_token *
|
||||
set_sgml_error(struct dom_scanner *scanner, unsigned char *end)
|
||||
{
|
||||
struct dom_scanner_token *token = scanner->current;
|
||||
struct dom_scanner_token *next;
|
||||
|
||||
assert(!scanner->found_error);
|
||||
|
||||
if (scanner->current >= scanner->table + DOM_SCANNER_TOKENS) {
|
||||
scanner->found_error = 1;
|
||||
next = NULL;
|
||||
|
||||
} else {
|
||||
scanner->current++;
|
||||
next = scanner->current;
|
||||
copy_struct(next, token);
|
||||
}
|
||||
|
||||
token->type = SGML_TOKEN_ERROR;
|
||||
token->lineno = scanner->lineno;
|
||||
set_dom_string(&token->string, scanner->position, end - scanner->position);
|
||||
|
||||
return next;
|
||||
}
|
||||
|
||||
|
||||
/* Text token scanning */
|
||||
|
||||
/* I think it is faster to not check the table here --jonas */
|
||||
@ -155,9 +235,17 @@ scan_sgml_text_token(struct dom_scanner *scanner, struct dom_scanner_token *toke
|
||||
}
|
||||
|
||||
/* We want the biggest possible text token. */
|
||||
if (check_sgml_incomplete(scanner, string) && !complete) {
|
||||
set_sgml_incomplete(scanner, token);
|
||||
return;
|
||||
if (!complete) {
|
||||
if (check_sgml_incomplete(scanner, string)) {
|
||||
set_sgml_incomplete(scanner, token);
|
||||
return;
|
||||
}
|
||||
|
||||
if (check_sgml_error(scanner)) {
|
||||
token = set_sgml_error(scanner, string);
|
||||
if (!token)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
@ -412,7 +500,7 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
|
||||
assert(real_length >= 0);
|
||||
|
||||
} else {
|
||||
skip_sgml_space(scanner, &string);
|
||||
scan_sgml(scanner, string, SGML_CHAR_IDENT);
|
||||
type = map_dom_scanner_string(scanner, ident, string, base);
|
||||
if (skip_sgml(scanner, &string, '>', 0)) {
|
||||
/* We found the end. */
|
||||
@ -436,7 +524,9 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
|
||||
real_length = string - token->string.string;
|
||||
skip_sgml_space(scanner, &string);
|
||||
|
||||
if (is_sgml_space(string[-1])) {
|
||||
/* Make '<?xml ' cause the right kind of error. */
|
||||
if (is_sgml_space(string[-1])
|
||||
&& string < scanner->end) {
|
||||
/* We found the end. */
|
||||
possibly_incomplete = 0;
|
||||
}
|
||||
@ -564,9 +654,20 @@ scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *t
|
||||
}
|
||||
}
|
||||
|
||||
if (possibly_incomplete && check_sgml_incomplete(scanner, string)) {
|
||||
set_sgml_incomplete(scanner, token);
|
||||
return;
|
||||
if (possibly_incomplete) {
|
||||
if (check_sgml_incomplete(scanner, string)) {
|
||||
set_sgml_incomplete(scanner, token);
|
||||
return;
|
||||
}
|
||||
|
||||
if (check_sgml_error(scanner) && string == scanner->end) {
|
||||
unsigned char *end;
|
||||
|
||||
end = get_sgml_error_end(scanner, type, string);
|
||||
token = set_sgml_error(scanner, end);
|
||||
if (!token)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
token->type = type;
|
||||
@ -599,13 +700,20 @@ scan_sgml_proc_inst_token(struct dom_scanner *scanner, struct dom_scanner_token
|
||||
}
|
||||
|
||||
if (!string) {
|
||||
/* Makes the next succeed when checking for incompletion. */
|
||||
/* Makes the next succeed when checking for incompletion, and
|
||||
* puts the rest of the text within the token. */
|
||||
string = scanner->end;
|
||||
|
||||
if (check_sgml_incomplete(scanner, string)) {
|
||||
set_sgml_incomplete(scanner, token);
|
||||
return;
|
||||
}
|
||||
|
||||
if (check_sgml_error(scanner)) {
|
||||
token = set_sgml_error(scanner, string);
|
||||
if (!token)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
token->type = SGML_TOKEN_PROCESS_DATA;
|
||||
@ -622,35 +730,34 @@ static struct dom_scanner_token *
|
||||
scan_sgml_tokens(struct dom_scanner *scanner)
|
||||
{
|
||||
struct dom_scanner_token *table_end = scanner->table + DOM_SCANNER_TOKENS;
|
||||
struct dom_scanner_token *current;
|
||||
|
||||
if (!begin_dom_token_scanning(scanner))
|
||||
return get_dom_scanner_token(scanner);
|
||||
|
||||
/* Scan tokens until we fill the table */
|
||||
for (current = scanner->table + scanner->tokens;
|
||||
current < table_end && scanner->position < scanner->end;
|
||||
current++) {
|
||||
for (scanner->current = scanner->table + scanner->tokens;
|
||||
scanner->current < table_end && scanner->position < scanner->end;
|
||||
scanner->current++) {
|
||||
if (scanner->state == SGML_STATE_ELEMENT
|
||||
|| (*scanner->position == '<'
|
||||
&& scanner->state != SGML_STATE_PROC_INST)) {
|
||||
skip_sgml_space(scanner, &scanner->position);
|
||||
if (scanner->position >= scanner->end) break;
|
||||
|
||||
scan_sgml_element_token(scanner, current);
|
||||
scan_sgml_element_token(scanner, scanner->current);
|
||||
|
||||
/* Shall we scratch this token? */
|
||||
if (current->type == SGML_TOKEN_SKIP) {
|
||||
current--;
|
||||
if (scanner->current->type =< |