1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-09-27 02:56:18 -04:00
This commit is contained in:
Kalle Olavi Niemitalo 2006-01-01 19:05:44 +02:00 committed by Kalle Olavi Niemitalo
commit 345ba7afcd
173 changed files with 9353 additions and 3637 deletions

View File

@ -241,7 +241,7 @@ Jimenez Martinez Angel Luis <al026635@alumail.uji.es>
Jonas Fonseca <fonseca@diku.dk>
Maintainer
Danish translation
http://elinks.or.cz/
http://elinks.cz/
Mailcap support
Major cleanups
BFU and document management hacking
@ -442,7 +442,7 @@ Petr Baudis <pasky@ucw.cz>
Feature of the Day implementation
LEDs, exmode, marks and other worthless toys
Actually even some documentation!
http://elinks.or.cz/ (original cut)
http://elinks.cz/ (original cut)
Petr Cech <cech@atrey.karlin.mff.cuni.cz>
Allow spaces at the beginning of URL

2
BUGS
View File

@ -1,4 +1,4 @@
All the known bugs are now kept in the Bugzilla, available at
http://bugzilla.elinks.or.cz/
http://bugzilla.elinks.cz/

View File

@ -13,7 +13,13 @@ PATHSCRIPT = case '@top_srcdir@' in /*) ;; *) echo $(top_builddir)/;; esac
PATHPREFIX = $(shell $(PATHSCRIPT))
top_srcdir = $(PATHPREFIX)@top_srcdir@
SRCPATH = $(shell cd $(top_srcdir)/$(RELPATH) && pwd)
ifeq ($(SRCPATH),$(CURPATH))
srcdir =
else
srcdir = $(top_srcdir)/$(RELPATH)
endif
prefix = @prefix@
exec_prefix = @exec_prefix@
@ -76,7 +82,6 @@ PYTHON_LIBS = @PYTHON_LIBS@
RANLIB = @RANLIB@
RUBY_CFLAGS = @RUBY_CFLAGS@
RUBY_LIBS = @RUBY_LIBS@
SEE_CFLAGS = @SEE_CFLAGS@
SPARSE = @SPARSE@
SPIDERMONKEY_CFLAGS = @SPIDERMONKEY_CFLAGS@
SPIDERMONKEY_LIBS = @SPIDERMONKEY_LIBS@
@ -134,10 +139,10 @@ CONFIG_RISCOS = @CONFIG_RISCOS@
CONFIG_RUBY = @CONFIG_RUBY@
CONFIG_SCANNER = @CONFIG_SCANNER@
CONFIG_SCRIPTING = @CONFIG_SCRIPTING@
CONFIG_SEE = @CONFIG_SEE@
CONFIG_SHA1 = @CONFIG_SHA1@
CONFIG_SMALL = @CONFIG_SMALL@
CONFIG_SMB = @CONFIG_SMB@
CONFIG_SM_SCRIPTING = @CONFIG_SM_SCRIPTING@
CONFIG_SPIDERMONKEY = @CONFIG_SPIDERMONKEY@
CONFIG_SSL = @CONFIG_SSL@
CONFIG_SYSMOUSE = @CONFIG_SYSMOUSE@
@ -158,6 +163,8 @@ INCLUDES = -I$(top_builddir) -I$(top_srcdir)/src
COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CFLAGS) $(CFLAGS)
MAKE_COLOR = @MAKE_COLOR@
### This is here because Makefile.config is usually the first thing
### we get and sometimes the all rule can be implicit, yet we want

View File

@ -14,36 +14,48 @@ else
mquiet = quiet_
endif
# Colorize the build.
ifdef MAKE_COLOR
INFO_COLOR = $(shell tput setaf 5)
CC_COLOR = $(shell tput setaf 6)
LD_COLOR = $(shell tput setaf 2)
PO_COLOR = $(shell tput setaf 6)
LINK_COLOR = $(shell tput bold;tput setaf 4)
INSTALL_COLOR = $(shell tput setaf 3)
END_COLOR = $(shell tput sgr0)
endif
# Show the command (quiet or non-quiet version based on the assignment
# just above) and then execute it.
ncmd = $(if $($(quiet)cmd_$(1)),echo $($(quiet)cmd_$(1)) &&) $(cmd_$(1))
cmd = @$(if $($(quiet)cmd_$(1)),echo $($(quiet)cmd_$(1)) &&) $(cmd_$(1))
mcmd = @$(if $($(mquiet)cmd_$(1)),echo $($(mquiet)cmd_$(1)) &&) $(cmd_$(1))
ecmd = @$(if $($(mquiet)cmd_$(1)),printf "%-38s " $($(mquiet)cmd_$(1)) &&) $(cmd_$(1))
quiet_cmd_compile = ' [CC] $(RELPATH)$@'
quiet_cmd_compile = ' [$(CC_COLOR)CC$(END_COLOR)] $(RELPATH)$@'
masq_cmd_compile = $(COMPILE) -c $<
cmd_compile = $(COMPILE) -Wp,-MD,.deps/$(*F).pp -c $<
# Rule to compile a set of .o files into one .o file
quiet_cmd_ld_objs = " [LD] $(RELPATH)$@"
quiet_cmd_ld_objs = " [$(LD_COLOR)LD$(END_COLOR)] $(RELPATH)$@"
cmd_ld_objs = $(LD) -r -o $@ $(filter $(OBJS), $^) \
$(foreach subdir,$(sort $(filter-out src,$(SUBDIRS))), \
`test -e $(subdir)/lib.o && echo $(subdir)/lib.o`)
quiet_cmd_link = ' [LINK] $(RELPATH)$@'
quiet_cmd_link = ' [$(LINK_COLOR)LINK$(END_COLOR)] $(RELPATH)$@'
cmd_link = $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)
quiet_cmd_sparse = ' [SPARSE] $(RELPATH)$(2)'
cmd_sparse = $(SPARSE) $(DEFS) $(INCLUDES) $(AM_CFLAGS) $(CFLAGS) $(SPARSE_FLAGS) $(2)
# Recursive make
quiet_cmd_recmake = "[MAKE $(3)] $(RELPATH)$(2)"
quiet_cmd_recmake = "[$(INFO_COLOR)MAKE $(3)$(END_COLOR)] $(RELPATH)$(2)"
cmd_recmake = $(MAKE) -C $(2) $(3)
quiet_cmd_installdata = " [INSTALL] $(RELPATH)$(2) -> $(3)"
quiet_cmd_installdata = " [$(INSTALL_COLOR)INSTALL$(END_COLOR)] $(RELPATH)$(2) -> $(3)"
cmd_installdata = $(INSTALL_DATA) $(2) $(3)
quiet_cmd_installprog = " [INSTALL] $(RELPATH)$(2) -> $(3)"
quiet_cmd_installprog = " [$(INSTALL_COLOR)INSTALL$(END_COLOR)] $(RELPATH)$(2) -> $(3)"
cmd_installprog = $(INSTALL_PROGRAM) $(2) $(3)
@ -88,9 +100,15 @@ ifdef INCLUDE_ALL
ifdef SUBDIRS-no
SUBDIRS += $(SUBDIRS-no)
endif
ifdef SUBDIRS-
SUBDIRS += $(SUBDIRS-)
endif
ifdef OBJS-no
OBJS += $(OBJS-no)
endif
ifdef OBJS-
OBJS += $(OBJS-)
endif
endif
@ -113,7 +131,7 @@ init-default:
echo 'include $(SRC)/$(RELPATH)/$(subdir)/Makefile' > $(subdir)/Makefile;)
clean-default:
-test -z "$(CLEAN)" || $(RM) $(CLEAN)
@-test -z "$(CLEAN)" || $(RM) $(CLEAN)
cleanall-default: clean-default

1
NEWS
View File

@ -11,6 +11,7 @@ ELinks now:
* SSL support via GNUTLS now requires 1.2 or higher
* support for Lua 4.x was dropped, we only support Lua 5.x now
* Python scripting back-end (experimental)
* Spidermonkey based ECMAScript scripting back-end (experimental)
* 88 colors support
* default URI-rewrite rule, used when no other rules match but the string that
was entered in the Go to URL box does not resemble a URI

8
README
View File

@ -8,7 +8,7 @@ on a variety of platforms.
The ELinks official website is available at
http://elinks.or.cz/
http://elinks.cz/
Please see the SITES file for mirrors or other recommended sites. If you
want to install ELinks on your computer, see the INSTALL file for further
@ -18,8 +18,8 @@ A good start point is documentation files available in doc/, especially the
file named index.txt.
If you want to request features or report bugs, see community information at
http://elinks.or.cz/community.html and feedback information available at
http://elinks.or.cz/feedback.html.
http://elinks.cz/community.html and feedback information available at
http://elinks.cz/feedback.html.
If you want to write some patches, please first read the doc/hacking.txt
document.
@ -55,7 +55,7 @@ separation of add-on modules (like cookies, bookmarks, ssl, scripting etc).
For more details about ELinks history, please see
http://elinks.or.cz/history.html
http://elinks.cz/history.html
If you are more interested in the history and various Links clones and versions,
you can examine the website at

3
SITES
View File

@ -10,9 +10,6 @@ RPMs:
http://ftp.nest.pld-linux.org/pool/e/elinks/
http://ftp.pld-linux.org/pool/e/elinks/
Cygwin binaries:
http://www.pervalidus.net/cygwin/links/
RISC OS binaries:
http://www.riscos.info/unix/indexes/browser.html

2
THANKS
View File

@ -12,6 +12,6 @@ Thanks to Gerard Beekmans <gerard@linuxfromscratch.org> for providing us the
resources for our mailing lists at linuxfromscratch.org servers.
Thanks to Jan Sembera <jan.sembera@flexible.cz> for providing us the space and
bandwidth on his server for elinks.or.cz and cvs.elinks.or.cz.
bandwidth on his server for elinks.cz and cvs.elinks.cz.
Thanks to Zdenek Burda for donating the elinks.cz domain.

2
TODO
View File

@ -1,4 +1,4 @@
The ELinks TODO items are kept in Bugzilla (http://bugzilla.elinks.or.cz/),
The ELinks TODO items are kept in Bugzilla (http://bugzilla.elinks.cz/),
look for the bugs with 'enhancement' severity. Some generic goals are still
kept here, though, to be more on the developers' eyes ;-).

4
Unicode/.gitignore vendored
View File

@ -1,4 +0,0 @@
Makefile
Makefile.in
.deps
*.swp

View File

@ -1,48 +0,0 @@
AC_DEFUN([EL_CONFIG_SEE],
[
enable_see="no";
AC_ARG_WITH(see, [ --with-see enable Simple Ecmascript Engine (SEE) support],
[ if test "x$withval" != xno; then enable_see=yes; fi ])
# The following is probably bad, ugly and so on. Stolen from Guile's (1.4)
# SEE_FLAGS but I really don't want to require people to have Guile in order
# to compile CVS. Also, the macro seems to be really stupid regarding searching
# for Guile in $PATH etc. --pasky
AC_MSG_CHECKING([for SEE])
if test "$enable_see" = "yes"; then
AC_MSG_RESULT(yes);
## Based on the SEE_FLAGS macro.
if test -d "$withval"; then
SEE_PATH="$withval:$PATH"
else
SEE_PATH="$PATH"
fi
AC_PATH_PROG(SEE_CONFIG, libsee-config, no, $SEE_PATH)
## First, let's just see if we can find Guile at all.
if test "$SEE_CONFIG" != no; then
cf_result="yes";
SEE_LIBS="`$SEE_CONFIG --libs`"
SEE_CFLAGS="`$SEE_CONFIG --cppflags`"
LIBS="$SEE_LIBS $LIBS"
CPPFLAGS="$CPPFLAGS $SEE_CFLAGS"
EL_CONFIG(CONFIG_SEE, [SEE])
AC_SUBST(SEE_CFLAGS)
else
if test -n "$withval" && test "x$withval" != xno; then
AC_MSG_ERROR([SEE not found])
else
AC_MSG_WARN([SEE support disabled])
fi
fi
else
AC_MSG_RESULT(no);
fi
])

View File

@ -21,7 +21,7 @@ do
esac
if test ! -d "$pathcomp"; then
echo "mkdir $pathcomp"
#echo "mkdir $pathcomp"
mkdir "$pathcomp" || lasterr=$?

View File

@ -5,7 +5,7 @@ AC_INIT(src/main/main.c)
AC_CONFIG_AUX_DIR(config)
PACKAGE=elinks
VERSION=0.11rc0.GIT
VERSION=0.12.GIT
AC_SUBST(PACKAGE)
AC_SUBST(VERSION)
AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Package version])
@ -496,6 +496,63 @@ if test "$CONFIG_WIN32" = yes; then
EL_CONFIG_WIN32
fi
dnl ===================================================================
dnl Check for SpiderMonkey, optional even if installed.
dnl ===================================================================
AC_ARG_WITH(spidermonkey, [ --without-spidermonkey disable SpiderMonkey Mozilla JavaScript engine support],
[if test "$withval" = no; then disable_spidermonkey=yes; fi])
AC_MSG_CHECKING([for SpiderMonkey])
EL_SAVE_FLAGS
cf_result=no
if test -z "$disable_spidermonkey"; then
if test ! -d "$withval"; then
withval="";
fi
for spidermonkeydir in "$withval" "" /usr /usr/local /opt/spidermonkey /opt/js; do
for spidermonkeyinclude in "/include" "/include/js" "/include/smjs"; do
for spidermonkeylib in js smjs; do
if test "$cf_result" = no; then
SPIDERMONKEY_LIBS="-l$spidermonkeylib"
if test ! -z "$spidermonkeydir"; then
SPIDERMONKEY_LIBS="-L$spidermonkeydir/lib $SPIDERMONKEY_LIBS"
SPIDERMONKEY_CFLAGS="-I$spidermonkeydir$spidermonkeyinclude"
fi
LIBS="$SPIDERMONKEY_LIBS $LIBS_X"
CFLAGS="$CFLAGS_X $SPIDERMONKEY_CFLAGS"
CPPFLAGS="$CPPFLAGS_X $SPIDERMONKEY_CFLAGS"
AC_TRY_LINK([#define XP_UNIX
#include <jsapi.h>],
[JS_GetImplementationVersion()],
cf_result=yes, cf_result=no)
fi
done
done
done
fi
AC_MSG_RESULT($cf_result)
if test "$cf_result" != yes; then
EL_RESTORE_FLAGS
else
EL_CONFIG(CONFIG_SPIDERMONKEY, [SpiderMonkey])
CFLAGS="$CFLAGS_X"
AC_SUBST(SPIDERMONKEY_LIBS)
AC_SUBST(SPIDERMONKEY_CFLAGS)
fi
AC_SUBST(CONFIG_SPIDERMONKEY)
EL_CONFIG_DEPENDS(CONFIG_ECMASCRIPT, [CONFIG_SPIDERMONKEY], [ECMAScript (JavaScript)])
dnl ===================================================================
dnl Check for Guile, optional even if installed.
dnl ===================================================================
@ -725,82 +782,35 @@ dnl ===================================================================
EL_CONFIG_RUBY
dnl ===================================================================
dnl Check for SEE, optional even if installed.
dnl Optional Spidermonkey-based ECMAScript browser scripting
dnl ===================================================================
EL_CONFIG_SEE
AC_ARG_ENABLE(sm-scripting,
[ --disable-sm-scripting ECMAScript browser scripting (requires Spidermonkey)],
[if test "$enableval" != no; then enableval="yes"; fi
CONFIG_SM_SCRIPTING="$enableval";])
if test "x$CONFIG_SPIDERMONKEY" = xyes &&
test "x$CONFIG_SM_SCRIPTING" = xyes; then
EL_CONFIG(CONFIG_SM_SCRIPTING, [SpiderMonkey])
else
CONFIG_SM_SCRIPTING=no
fi
dnl ===================================================================
dnl Setup global scripting
dnl ===================================================================
EL_CONFIG_DEPENDS(CONFIG_SCRIPTING, [CONFIG_GUILE CONFIG_LUA CONFIG_PERL CONFIG_PYTHON CONFIG_RUBY CONFIG_SEE], [Scripting])
EL_CONFIG_DEPENDS(CONFIG_SCRIPTING, [CONFIG_GUILE CONFIG_LUA CONFIG_PERL CONFIG_PYTHON CONFIG_RUBY CONFIG_SM_SCRIPTING], [Browser scripting])
AC_SUBST(CONFIG_GUILE)
AC_SUBST(CONFIG_LUA)
AC_SUBST(CONFIG_PERL)
AC_SUBST(CONFIG_PYTHON)
AC_SUBST(CONFIG_RUBY)
AC_SUBST(CONFIG_SEE)
AC_SUBST(CONFIG_SM_SCRIPTING)
AC_SUBST(CONFIG_SCRIPTING)
dnl ===================================================================
dnl Check for SpiderMonkey, optional even if installed.
dnl ===================================================================
AC_ARG_WITH(spidermonkey, [ --without-spidermonkey disable SpiderMonkey Mozilla JavaScript engine support],
[if test "$withval" = no; then disable_spidermonkey=yes; fi])
AC_MSG_CHECKING([for SpiderMonkey])
EL_SAVE_FLAGS
cf_result=no
if test -z "$disable_spidermonkey"; then
if test ! -d "$withval"; then
withval="";
fi
for spidermonkeydir in "$withval" "" /usr /usr/local /opt/spidermonkey /opt/js; do
for spidermonkeyinclude in "/include" "/include/js" "/include/smjs"; do
for spidermonkeylib in js smjs; do
if test "$cf_result" = no; then
SPIDERMONKEY_LIBS="-l$spidermonkeylib"
if test ! -z "$spidermonkeydir"; then
SPIDERMONKEY_LIBS="-L$spidermonkeydir/lib $SPIDERMONKEY_LIBS"
SPIDERMONKEY_CFLAGS="-I$spidermonkeydir$spidermonkeyinclude"
fi
LIBS="$SPIDERMONKEY_LIBS $LIBS_X"
CFLAGS="$CFLAGS_X $SPIDERMONKEY_CFLAGS"
CPPFLAGS="$CPPFLAGS_X $SPIDERMONKEY_CFLAGS"
AC_TRY_LINK([#define XP_UNIX
#include <jsapi.h>],
[JS_GetImplementationVersion()],
cf_result=yes, cf_result=no)
fi
done
done
done
fi
AC_MSG_RESULT($cf_result)
if test "$cf_result" != yes; then
EL_RESTORE_FLAGS
else
EL_CONFIG(CONFIG_SPIDERMONKEY, [SpiderMonkey])
CFLAGS="$CFLAGS_X"
AC_SUBST(SPIDERMONKEY_LIBS)
AC_SUBST(SPIDERMONKEY_CFLAGS)
fi
AC_SUBST(CONFIG_SPIDERMONKEY)
EL_CONFIG_DEPENDS(CONFIG_ECMASCRIPT, [CONFIG_SPIDERMONKEY], [ECMAScript (JavaScript)])
dnl ===================================================================
dnl Check for SSL support.
dnl ===================================================================
@ -1227,7 +1237,7 @@ if test "x$ac_cv_prog_gcc" = "xyes"; then
CFLAGS="$CFLAGS -Werror"
fi
case "`gcc -dumpversion`" in
case "`$CC -dumpversion`" in
3.0|3.1|3.2)
# These should be ok using -Werror
;;
@ -1257,6 +1267,15 @@ ALL_CFLAGS="$CFLAGS $CPPFLAGS"
EL_LOG_CONFIG(ALL_CFLAGS, [Compiler options (CFLAGS)], [])
EL_LOG_CONFIG(LIBS, [Linker options (LIBS)], [])
dnl ===================================================================
dnl Colored make output
dnl ===================================================================
if test $(`which tput` colors) -ge 4; then
MAKE_COLOR=1
AC_SUBST(MAKE_COLOR)
fi
dnl ===================================================================
dnl Generated files
dnl ===================================================================

View File

@ -37,8 +37,8 @@ License: GPL
Vendor: ELinks project <elinks-users@linuxfromscratch.org>
Packager: Petr Baudis <pasky@ucw.cz>
Group: Applications/Internet
Source: http://elinks.or.cz/download/%{name}-%{version}.tar.bz2
URL: http://elinks.or.cz/
Source: http://elinks.cz/download/%{name}-%{version}.tar.bz2
URL: http://elinks.cz/
BuildRequires: bzip2-devel
BuildRequires: expat-devel
BuildRequires: gpm-devel
@ -133,6 +133,9 @@ rm -rf $RPM_BUILD_ROOT
# date +"%a %b %d %Y"
%changelog
* Thu Dec 29 2005 Miciah Dashiel Butler Masters <mdm0304@ecu.edu>
- elinks.or.cz -> elinks.cz
*Tue Jun 14 2005 Witold Filipczyk <witekfl@pld-linux.org>
- removed unused texi2html dependency
- removed unused libdir directory

View File

@ -9,15 +9,22 @@ if [ -z "`which wget 2>/dev/null`" ]; then
exit 1
fi
[ -d .git ] && cd .git
[ "$GIT_DIR" ] || GIT_DIR=.git
if ! [ -d "$GIT_DIR" ]; then
echo "Error: You must run this from the project root (or set GIT_DIR to your .git directory)." >&2
exit 1
fi
cd "$GIT_DIR"
echo "[grafthistory] Downloading the history"
mkdir -p objects/pack
cd objects/pack
wget -c http://elinks.or.cz/elinks-history.git/objects/pack/pack-0d6c5c67aab3b9d5d9b245da5929c15d79124a48.idx
wget -c http://elinks.or.cz/elinks-history.git/objects/pack/pack-0d6c5c67aab3b9d5d9b245da5929c15d79124a48.pack
wget -c http://elinks.cz/elinks-history.git/objects/pack/pack-0d6c5c67aab3b9d5d9b245da5929c15d79124a48.idx
wget -c http://elinks.cz/elinks-history.git/objects/pack/pack-0d6c5c67aab3b9d5d9b245da5929c15d79124a48.pack
echo "[grafthistory] Setting up the grafts"
cd ../..
mkdir -p info
# master
echo 0f6d4310ad37550be3323fab80456e4953698bf0 06135dc2b8bb7ed2e441305bdaa82048396de633 >>info/grafts
# REL_0_10

View File

@ -171,8 +171,8 @@ end
dumbprefixes = {
arc = "http://web.archive.org/web/*/%c",
b = "http://babelfish.altavista.com/babelfish/tr",
bz = "http://bugzilla.elinks.or.cz",
bug = "http://bugzilla.elinks.or.cz",
bz = "http://bugzilla.elinks.cz",
bug = "http://bugzilla.elinks.cz",
d = "http://www.dict.org",
g = "http://www.google.com/",
gg = "http://www.google.com/",
@ -225,7 +225,7 @@ function cvsweb (base, project, url)
string.gsub(url, "([^%s]+)", function (w) table.insert(t, w) end)
file, old, new = t[1], t[2], t[3]
if t[4] then error('this smartprefix takes only two to three arguments') return nil end
if t[4] then error('this smartprefix takes only one to three arguments') return nil end
if not file then error('no file given') return nil end
if new then return base..project.."/"..file..".diff?r1="..old.."&r2="..new.."&f=u"
@ -263,12 +263,12 @@ end
smartprefixes = {
arc = "http://web.archive.org/web/*/%s",
bug = function (url) return bugzilla('http://bugzilla.elinks.or.cz/', url) end,
bug = function (url) return bugzilla('http://bugzilla.elinks.cz/', url) end,
cambridge = "http://dictionary.cambridge.org/results.asp?searchword=%s",
cliki = "http://www.cliki.net/admin/search?words=%s",
-- If you want to add a smartprefix for another project's CVSweb,
-- just create a lambda like this. Aren't high-level languages fun?
cvs = function (x) return cvsweb ("http://cvsweb.elinks.or.cz/cvsweb.cgi/", "elinks", x) end,
cvs = function (x) return cvsweb ("http://cvsweb.elinks.cz/cvsweb.cgi/", "elinks", x) end,
d = "http://www.dict.org/bin/Dict?Query=%s&Form=Dict1&Strategy=*&Database=*&submit=Submit+query",
debcontents = debian_contents,
debfile = debian_file,

View File

@ -135,41 +135,42 @@ B<bugmenot> or B<bn>
{
($current_url) = $current_url =~ /^.*:\/\/(.*)/;
my $bugmenot = 'http://bugmenot.com/view.php?url=' . $current_url;
my $tempfile = $ENV{'HOME'} . '/.elinks/elinks';
my $matrix = '1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
for (0..int(rand(7) + 9))
{
$tempfile = $tempfile . substr($matrix, (length($matrix) - 1) - rand(length($matrix) + 1), 1);
}
my ($message, $login, $password);
system('elinks -no-home -source "' . $bugmenot . '" >' . $tempfile . ' 2>/dev/null');
open FILE, "<$tempfile" or return $bugmenot;
$message = <FILE>;
while (<FILE>)
{
next unless (m/^<dd>(.*)<br \/>(.*)<\/dd><\/dl>$/);
$login = $1;
$password = $2;
}
$login =~ s/(^\s*|\n|\s*$)//g if $login;
$password =~ s/(^\s*|\n|\s*$)//g if $password;
close FILE;
unlink $tempfile;
return $bugmenot unless $message =~ /[a-z]+/ and $message !~ /404/;
unless ($message =~ s/.*(No accounts found\.).*/${1}/)
{
if ($login and $password)
{
$message = "Login: " . $login . "\nPassword: " . $password;
}
else
{
$message = 'No accounts found';
}
}
system('elinks -remote "infoBox\(' . $message . ')" >/dev/null 2>&1 &');
return $current_url; #FIXME
# return;
#my $tempfile = $ENV{'HOME'} . '/.elinks/elinks';
#my $matrix = '1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
#for (0..int(rand(7) + 9))
#{
#$tempfile = $tempfile . substr($matrix, (length($matrix) - 1) - rand(length($matrix) + 1), 1);
#}
#my ($message, $login, $password);
#system('elinks -no-home -source "' . $bugmenot . '" >' . $tempfile . ' 2>/dev/null');
#open FILE, "<$tempfile" or return $bugmenot;
#$message = <FILE>;
#while (<FILE>)
#{
#next unless (m/^<dd>(.*)<br \/>(.*)<\/dd><\/dl>$/);
#$login = $1;
#$password = $2;
#}
#$login =~ s/(^\s*|\n|\s*$)//g if $login;
#$password =~ s/(^\s*|\n|\s*$)//g if $password;
#close FILE;
#unlink $tempfile;
#return $bugmenot unless $message =~ /[a-z]+/ and $message !~ /404/;
#unless ($message =~ s/.*(No accounts found\.).*/${1}/)
#{
#if ($login and $password)
#{
#$message = "Login: " . $login . "\nPassword: " . $password;
#}
#else
#{
#$message = 'No accounts found';
#}
#}
#system('elinks -remote "infoBox\(' . $message . ')" >/dev/null 2>&1 &');
#return $current_url; #FIXME
##return;
return $bugmenot . $current_url;
}
@ -896,7 +897,7 @@ There's no place like home...
my ($bug) = $url =~ /^.* (.*)/;
if ($url =~ '^b')
{
my $bugzilla = 'http://bugzilla.elinks.or.cz';
my $bugzilla = 'http://bugzilla.elinks.cz';
if (not $bug)
{
if (loadrc("email"))
@ -922,7 +923,7 @@ There's no place like home...
$doc = '/documentation' if $url =~ '^doc';
$doc = '/faq.html' if $url =~ '^(faq|help)$';
$doc = '/documentation/html/manual.html' if $url =~ '^manual$';
return 'http://elinks.or.cz' . $doc;
return 'http://elinks.cz' . $doc;
}
}

View File

@ -1,220 +0,0 @@
/* Hooks for the ELinks SEE browser scripting
*
* Copyright (c) Jonas Fonseca, 2005
*/
function quit()
{
// alert("quiting ... " + navigator.appVersion);
}
/*********************************************************************
* goto_url(url, current_url)
*********************************************************************/
var goto_url_hooks = []
function goto_url(url, current_url)
{
var context = {
url: url,
current_url: current_url ? current_url : ""
}
for (var i = 0; i < goto_url_hooks.length; i++)
if (goto_url_hooks[i](context, current_url))
break
return context.url
}
// Don't take localhost as directory name
function expand_localhost(context)
{
if (context.url.match(/localhost/)) {
context.url = "http://" + context.url + "/"
return true
}
return false
}
goto_url_hooks.push(expand_localhost)
// You can write smt like "gg" to goto URL dialog and it'll go to google.com.
// Note that this is obsoleted by the URI rewrite plugin.
var dumbprefixes = {
arc: "http://web.archive.org/web/*/%c",
b: "http://babelfish.altavista.com/babelfish/tr",
bz: "http://bugzilla.elinks.or.cz",
bug: "http://bugzilla.elinks.or.cz",
d: "http://www.dict.org",
g: "http://www.google.com/",
gg: "http://www.google.com/",
go: "http://www.google.com/",
fm: "http://www.freshmeat.net/",
sf: "http://www.sourceforge.net/",
dbug: "http://bugs.debian.org/",
dpkg: "http://packages.debian.org/",
pycur: "http://www.python.org/doc/current/",
pydev: "http://www.python.org/dev/doc/devel/",
pyhelp: "http://starship.python.net/crew/theller/pyhelp.cgi",
pyvault: "http://www.vex.net/parnassus/",
e2: "http://www.everything2.org/",
sd: "http://www.slashdot.org/",
vhtml: "http://validator.w3.org/check?uri=%c",
vcss: "http://jigsaw.w3.org/css-validator/validator?uri=%c"
}
function expand_dumbprefix(context, current_url)
{
if (dumbprefixes[context.url]) {
context.url = dumbprefixes[context.url].replace(/%c/, current_url)
return true
}
return false
}
goto_url_hooks.push(expand_dumbprefix)
function gmane(url)
{
var match = url.match(/([^\s]+)\s+(.*)$/)
var group = match[1]
var words = match[2]
if (!words) return null
return "http://search.gmane.org/search.php?query=" + words + "&group=" + group
}
function gitweb(base_url)
{
return function (arguments) {
var url = base_url
var match = arguments.match(/^(search|summary|shortlog|log|commit|commitdiff|tree)(\s(.*))?/)
if (match[1])
url += ';a=' + match[1]
else
url += ';a=summary'
if (match[1] == 'search' && match[3])
url += ';s=' + escape(match[3])
return url
}
}
function bugzilla (base_url)
{
return function (arguments) {
if (!arguments || arguments == '')
return base_url
if (arguments.match(/^[\d]+$/))
return base_url + 'show_bug.cgi?id=' + arguments
return base_url + 'buglist.cgi?short_desc_type=allwordssubstr'
+ '&short_desc=' + escape(arguments)
}
}
var smartprefixes = {
arc: "http://web.archive.org/web/*/%s",
bug: bugzilla('http://bugzilla.elinks.or.cz/'),
cambridge: "http://dictionary.cambridge.org/results.asp?searchword=%s",
cliki: "http://www.cliki.net/admin/search?words: %s",
d: "http://www.dict.org/bin/Dict?Query: %s&Form=Dict1&Strategy=*&Database=*&submit=Submit+query",
dmoz: "http://search.dmoz.org/cgi-bin/search?search=%s",
foldoc: "http://wombat.doc.ic.ac.uk/foldoc/foldoc.cgi?%s",
g: "http://www.google.com/search?q=%s&btnG=Google+Search",
gd: "http://www.google.com/search?q=%s&cat=gwd/Top",
gg: "http://www.google.com/search?q=%s&btnG=Google+Search",
// Whose idea was it to use 'gg' for websearches? -- Miciah
//gg = "http://groups.google.com/groups?q=%s",
gi: "http://images.google.com/images?q=%s",
gitweb: gitweb("http://pasky.or.cz/gitweb.cgi?p=elinks.git"),
gmane: gmane,
gn: "http://news.google.com/news?q=%s",
go: "http://www.google.com/search?q=%s&btnG=Google+Search",
gwho: "http://www.googlism.com/?ism=%s&name=1",
gwhat: "http://www.googlism.com/?ism=%s&name=2",
gwhere: "http://www.googlism.com/?ism=%s&name=3",
gwhen: "http://www.googlism.com/?ism=%s&name=4",
fm: "http://www.freshmeat.net/search/?q=%s",
savannah: "http://savannah.nongnu.org/search/?words=%s&type_of_search=soft&exact=1",
sf: "http://sourceforge.net/search/?q=%s",
sfp: "http://sourceforge.net/projects/%s",
sd: "http://www.slashdot.org/search.pl?query=%s",
sdc: "http://www.slashdot.org/search.pl?query=%s&op=comments",
sdu: "http://www.slashdot.org/search.pl?query=%s&op=users",
sdp: "http://www.slashdot.org/search.pl?query=%s&op=polls",
sdj: "http://www.slashdot.org/search.pl?query=%s&op=journals",
dbug: "http://bugs.debian.org/%s",
dpkg: "http://packages.debian.org/%s",
emacs: "http://www.emacswiki.org/cgi-bin/wiki.pl?search=%s",
lyrics: "http://music.lycos.com/lyrics/results.asp?QT=L&QW=%s",
lxr: "http://lxr.linux.no/ident?i=%s",
leo: "http://dict.leo.org/?search=%s",
onelook: "http://onelook.com/?w=%s&ls=a",
py: "http://starship.python.net/crew/theller/pyhelp.cgi?keyword=%s&version=current",
pydev: "http://starship.python.net/crew/theller/pyhelp.cgi?keyword=%s&version=devel",
pyvault: "http://py.vaults.ca/apyllo.py?find=%s",
e2: "http://www.everything2.org/?node=%s",
encz: "http://www.slovnik.cz/bin/ecd?ecd_il=1&ecd_vcb=%s&ecd_trn=translate&ecd_trn_dir=0&ecd_lines=15&ecd_hptxt=0",
czen: "http://www.slovnik.cz/bin/ecd?ecd_il=1&ecd_vcb=%s&ecd_trn=translate&ecd_trn_dir=1&ecd_lines=15&ecd_hptxt=0",
dict: "http://dictionary.reference.com/search?q=%s",
thes: "http://thesaurus.reference.com/search?q=%s",
a: "http://acronymfinder.com/af-query.asp?String=exact&Acronym=%s",
imdb: "http://imdb.com/Find?%s",
mw: "http://www.m-w.com/cgi-bin/dictionary?book=Dictionary&va=%s",
mwt: "http://www.m-w.com/cgi-bin/thesaurus?book=Thesaurus&va=%s",
whatis: "http://uptime.netcraft.com/up/graph/?host=%s",
wiki: "http://www.wikipedia.org/w/wiki.phtml?search=%s",
wn: "http://www.cogsci.princeton.edu/cgi-bin/webwn1.7.1?stage=1&word=%s",
// rfc by number
rfc: "http://www.rfc-editor.org/rfc/rfc%s.txt",
// rfc search
rfcs: "http://www.rfc-editor.org/cgi-bin/rfcsearch.pl?searchwords=%s&format=http&abstract=abson&keywords=keyon&num=25",
cr: "http://www.rfc-editor.org/cgi-bin/rfcsearch.pl?searchwords=%s&format=http&abstract=abson&keywords=keyon&num=25",
// Internet Draft search
rfcid: "http://www.rfc-editor.org/cgi-bin/idsearch.pl?searchwords=%s&format=http&abstract=abson&keywords=keyon&num=25",
urbandict: "http://www.urbandictionary.com/define.php?term=%s",
id: "http://www.rfc-editor.org/cgi-bin/idsearch.pl?searchwords=%s&format=http&abstract=abson&keywords=keyon&num=25",
draft: "http://www.rfc-editor.org/cgi-bin/idsearch.pl?searchwords=%s&format=http&abstract=abson&keywords=keyon&num=25"
}
function expand_smartprefix(context, current_url)
{
var match = context.url.match(/^([^:\s]+)(:|\s)\s*(.*)\s*$/)
if (match && match[1] && match[3]) {
var nick = match[1]
var val = match[3]
if (smartprefixes[nick]) {
if (typeof smartprefixes[nick] == 'string') {
context.url = smartprefixes[nick].replace(/%s/, escape(val))
return true
} else if (typeof smartprefixes[nick] == 'function') {
context.url = smartprefixes[nick](val)
return true
} else {
alert('smartprefix "' + nick + '" has unsupported type "' + typeof smartprefixes[nick] + '".')
return false
}
}
}
// Unmatched.
return false
}
goto_url_hooks.push(expand_smartprefix)

75
contrib/smjs/hooks.js Normal file
View File

@ -0,0 +1,75 @@
/* These are examples for the ELinks SpiderMonkey scripting interface.
* Place choice parts in a file named "hooks.js" in your ELinks configuration
* directory (~/.elinks).
*/
elinks.keymaps.main["@"] = function () {
elinks.location = elinks.location + "/..";
};
elinks.preformat_html_hooks = new Array();
elinks.preformat_html = function (cached) {
for (var i in elinks.preformat_html_hooks)
if (!elinks.preformat_html_hooks[i](cached))
return false;
return true;
};
elinks.goto_url_hooks = new Array();
elinks.goto_url_hook = function (url) {
for (var i in elinks.goto_url_hooks){
url = elinks.goto_url_hooks[i](url);
if (false === url) return false;
}
return url;
};
elinks.follow_url_hooks = new Array();
elinks.follow_url_hook = function (url) {
for (var i in elinks.follow_url_hooks) {
url = elinks.follow_url_hooks[i](url);
if (false === url) return false;
}
return url;
};
function root_w00t(cached) {
cached.content = cached.content.replace(/root/g, "w00t");
return true;
};
elinks.preformat_html_hooks.push(root_w00t);
function mangle_deb_bugnumbers(cached) {
if (!cached.uri.match(/^[a-z0-9]+:\/\/[a-z0-9A-Z.-]+debian\.org/)
&& !cached.uri.match(/changelog\.Debian/))
return true;
var num_re = /([0-9]+)/g;
var rewrite_closes_fn = function (str) {
return str.replace(num_re,
'<a href="http://bugs.debian.org/$1">$1</a>');
}
/* Debian Policy Manual 4.4 footnote 16 */
var closes_re = /closes:\s*(?:bug)?\#?\s?\d+(?:,\s*(?:bug)?\#?\s?\d+)*/gi;
cached.content = cached.content.replace(closes_re, rewrite_closes_fn);
return true;
}
elinks.preformat_html_hooks.push(mangle_deb_bugnumbers);
function block_pr0n(uri) {
if (uri.match(/pr0n/)) {
elinks.alert('No pr0n!');
return "";
}
return true;
}
elinks.follow_url_hooks.push(block_pr0n);
do_file(elinks.home + 'smartprefixes_bookmarks.js');
do_file(elinks.home + 'smartprefixes_classic.js');

View File

@ -0,0 +1,35 @@
/* Modern, bookmark-based smartprefixes */
var loaded_smartprefixes_common_code;
if (!loaded_smartprefixes_common_code) {
do_file(elinks.home + "smartprefixes_common.js");
loaded_smartprefixes_common_code = 1;
}
/* Create a top-level folder titled "smartprefixes". In it, add a bookmark
* for each smartprefix, putting the keyword in the title and either a normal
* URI or some JavaScript code prefixed with "javascript:" as the URI. When you
* enter the keyword in the Go to URL box, ELinks will take the URI
* of the corresponding bookmark, replace any occurrence of "%s" with the rest
* of the text entered in the Go to URL box, evaluate the code if the URI
* starts with "javascript:", and go to the resulting URI.
*/
function rewrite_uri(uri) {
if (!elinks.bookmarks.smartprefixes) return uri;
var parts = uri.split(" ");
var prefix = parts[0];
if (!elinks.bookmarks.smartprefixes.children[prefix]) return uri;
var rule = elinks.bookmarks.smartprefixes.children[prefix].url;
var rest = parts.slice(1).join(" ");
if (rule.match(/^javascript:/))
return eval(rule
.replace(/^javascript:/, "")
.replace(/%s/, rest));
return rule.replace(/%s/, escape(rest));
}
elinks.goto_url_hooks.push(rewrite_uri);

View File

@ -0,0 +1,102 @@
/* Classic, table-based smartprefixes. */
var loaded_smartprefixes_common_code;
if (!loaded_smartprefixes_common_code) {
do_file(elinks.home + "smartprefixes_common.js");
loaded_smartprefixes_common_code = 1;
}
var smartprefixes = {
arc: "http://web.archive.org/web/*/%s",
binsearch: "http://binsearch.info/?q=%s",
bug: function (url) { return bugzilla('http://bugzilla.elinks.cz/', url) },
cambridge: "http://dictionary.cambridge.org/results.asp?searchword=%s",
cliki: "http://www.cliki.net/admin/search?words=%s",
// If you want to add a smartprefix for another project's CVSweb,
// just create a lambda like this. Aren't high-level languages fun?
cvs: function (x) { return cvsweb ("http://cvsweb.elinks.cz/cvsweb.cgi/", "elinks", x) },
gitweb: function (x) { return gitweb("http://pasky.or.cz/gitweb.cgi", "elinks.git", x) },
d: "http://www.dict.org/bin/Dict?Query=%s&Form=Dict1&Strategy=*&Database=*&submit=Submit+query",
debcontents: debian_contents,
debfile: debian_file,
dix: "http://dix.osola.com/?search=%s",
dmoz: "http://search.dmoz.org/cgi-bin/search?search=%s",
foldoc: "http://wombat.doc.ic.ac.uk/foldoc/foldoc.cgi?%s",
g: "http://www.google.com/search?q=%s&btnG=Google+Search",
gd: "http://www.google.com/search?q=%s&cat=gwd/Top",
// Whose idea was it to use 'gg' for websearches? -- Miciah
//gg: "http://groups.google.com/groups?q=%s",
gi: "http://images.google.com/images?q=%s",
gmane: gmane,
gn: "http://news.google.com/news?q=%s",
go: "http://www.google.com/search?q=%s&btnG=Google+Search",
gwho: "http://www.googlism.com/?ism=%s&name=1",
gwhat: "http://www.googlism.com/?ism=%s&name=2",
gwhere: "http://www.googlism.com/?ism=%s&name=3",
gwhen: "http://www.googlism.com/?ism=%s&name=4",
fm: "http://www.freshmeat.net/search/?q=%s",
savannah: "http://savannah.nongnu.org/search/?words=%s&type_of_search=soft&exact=1",
sf: "http://sourceforge.net/search/?q=%s",
sfp: "http://sourceforge.net/projects/%s",
sd: "http://www.slashdot.org/search.pl?query=%s",
sdc: "http://www.slashdot.org/search.pl?query=%s&op=comments",
sdu: "http://www.slashdot.org/search.pl?query=%s&op=users",
sdp: "http://www.slashdot.org/search.pl?query=%s&op=polls",
sdj: "http://www.slashdot.org/search.pl?query=%s&op=journals",
dbug: "http://bugs.debian.org/%s",
dix: "http://dix.osola.com/index.de.php?trans=1&search=%s",
dixgram: "http://dix.osola.com/v.php?language=german&search=%s",
dpkg: "http://packages.debian.org/%s",
emacs: "http://www.emacswiki.org/cgi-bin/wiki.pl?search=%s",
lyrics: "http://music.lycos.com/lyrics/results.asp?QT=L&QW=%s",
lxr: "http://lxr.linux.no/ident?i=%s",
leo: "http://dict.leo.org/?search=%s",
nclaw: "http://www.ncleg.net/gascripts/Statutes/StatutesSearch.asp?searchScope=All&searchCriteria=%s&returnType=Section",
onelook: "http://onelook.com/?w=%s&ls=a",
py: "http://starship.python.net/crew/theller/pyhelp.cgi?keyword=%s&version=current",
pydev: "http://starship.python.net/crew/theller/pyhelp.cgi?keyword=%s&version=devel",
pyvault: "http://py.vaults.ca/apyllo.py?find=%s",
e2: "http://www.everything2.org/?node=%s",
encz: "http://www.slovnik.cz/bin/ecd?ecd_il=1&ecd_vcb=%s&ecd_trn=translate&ecd_trn_dir=0&ecd_lines=15&ecd_hptxt=0",
czen: "http://www.slovnik.cz/bin/ecd?ecd_il=1&ecd_vcb=%s&ecd_trn=translate&ecd_trn_dir=1&ecd_lines=15&ecd_hptxt=0",
dict: "http://dictionary.reference.com/search?q=%s",
thes: "http://thesaurus.reference.com/search?q=%s",
a: "http://acronymfinder.com/af-query.asp?String=exact&Acronym=%s",
imdb: "http://imdb.com/Find?%s",
mw: "http://www.m-w.com/cgi-bin/dictionary?book=Dictionary&va=%s",
mwt: "http://www.m-w.com/cgi-bin/thesaurus?book=Thesaurus&va=%s",
whatis: "http://uptime.netcraft.com/up/graph/?host=%s",
wiki: "http://en.wikipedia.org/w/wiki.phtml?search=%s",
wikide: "http://de.wikipedia.org/w/wiki.phtml?search=%s",
wn: "http://www.cogsci.princeton.edu/cgi-bin/webwn1.7.1?stage=1&word=%s",
// rfc by number
rfc: "http://www.rfc-editor.org/rfc/rfc%s.txt",
// rfc search
rfcs: "http://www.rfc-editor.org/cgi-bin/rfcsearch.pl?searchwords=%s&format=http&abstract=abson&keywords=keyon&num=25",
cr: "http://www.rfc-editor.org/cgi-bin/rfcsearch.pl?searchwords=%s&format=http&abstract=abson&keywords=keyon&num=25",
// Internet Draft search
rfcid: "http://www.rfc-editor.org/cgi-bin/idsearch.pl?searchwords=%s&format=http&abstract=abson&keywords=keyon&num=25",
urbandict: "http://www.urbandictionary.com/define.php?term=%s",
id: "http://www.rfc-editor.org/cgi-bin/idsearch.pl?searchwords=%s&format=http&abstract=abson&keywords=keyon&num=25",
draft: "http://www.rfc-editor.org/cgi-bin/idsearch.pl?searchwords=%s&format=http&abstract=abson&keywords=keyon&num=25",
};
function rewrite_uri_classic(uri) {
var parts = uri.split(" ");
var prefix = parts[0];
var rest = parts.slice(1).join(" ");
var rule = smartprefixes[prefix];
if (rule) {
if (typeof(rule) == 'string')
return rule.replace(/%s/, escape(rest));
if (typeof(rule) == 'function')
return rule(rest);
elinks.alert('smartprefix[' + prefix + ']'
+ ' has unsupported type "' + t + '".');
}
return uri;
}
elinks.goto_url_hooks.push(rewrite_uri_classic);

View File

@ -0,0 +1,102 @@
/* Common code for smartprefixes_classic.js and smartprefixes_bookmarks.js. */
/* Helper function for debian_contents and debian_file. */
function debian_package (url, t)
{
url = url.replace(/(\w+):(\w+)/g,
function (all, key, val) { t[key] = val; return ""; })
return 'http://packages.debian.org/cgi-bin/search_contents.pl?word='
+ escape(url.replace(/\s*(\S+)\s*/, '$1'))
+ '&searchmode=' + (t.searchmode || 'searchfilesanddirs')
+ '&case=' + (t["case"] || 'insensitive')
+ '&version=' + (t.version || 'stable')
+ '&arch=' + (t.arch || 'i386')
}
/* javascript:debian_contents("%s"); */
function debian_contents (url)
{
return debian_package (url, { searchmode: "filelist" })
}
/* javascript:debian_file("%s"); */
function debian_file (url)
{
return debian_package (url, { searchmode: "searchfilesanddirs" })
}
/* javascript:cvsweb("http://cvsweb.elinks.cz/cvsweb.cgi/", "elinks", "%s"); */
function cvsweb (base, project, url)
{
/* <file>:<revision>[-><revision>] */
url = url.replace(/^(.*):(.*?)(?:->(.*))?$/, "$1 $2 $3");
var parts = url.split(" ");
if (parts[3]) {
elinks.alert('this smartprefix takes only one to three arguments');
return "";
}
var file = parts[0], oldrev = parts[1], newrev = parts[2];
if (!file) {
elinks.alert('no file given');
return "";
}
if (newrev)
return base + project + "/" + file + ".diff"
+ "?r1=" + oldrev + "&r2=" + newrev + "&f=u";
if (oldrev)
return base + "~checkout~/" + project + "/" + file
+ (oldrev != "latest" && "?rev=" + oldrev || "");
return base + project + "/" + file
}
/* javascript:gitweb("http://pasky.or.cz/gitweb.cgi", "elinks.git", "%s"); */
function gitweb(base, project, url)
{
var parts = url.match(/^(search|summary|shortlog|log|commit|commitdiff|tree|tag)(\s(.*))?/);
var query = '?p=' + project;
if (parts) {
query += ';a=' + parts[1];
/* If the extra arg is not for searching assume it is an ID. */
if (parts[1] == 'search' && parts[3])
query += ';s=' + escape(parts[3]);
else if (parts[3])
query += ';h=' + escape(parts[3]);
} else {
query += ';a=summary';
}
return base + query;
}
/* javascript:gmane("%s") */
function gmane (url)
{
var v = url.split(' ');
var group = v[0], words = v.slice(1).join(' ');
if (!words) return "";
return "http://search.gmane.org/search.php?query=" + words
+ "&group=" + group;
}
/* javascript:bugzilla('http://bugzilla.elinks.cz/', "%s"); */
function bugzilla (base_url, arguments)
{
if (!arguments || arguments == '') return base_url;
if (arguments.match(/^[\d]+$/))
return base_url + 'show_bug.cgi?id=' + arguments;
return base_url + 'buglist.cgi?short_desc_type=allwordssubstr'
+ '&short_desc=' + escape(arguments);
}

View File

@ -38,3 +38,12 @@ h1 {
/* Further, give it a pretty colour: */
color: lightgoldenrod !important;
}
/* The following specifies colors used in the HTML highligting. */
document { color: yellow }
element { color: lightgreen }
entity-reference { color: red }
proc-instruction { color: red }
attribute { color: magenta }
comment { color: aqua }
cdata-section { color: orange2 }

View File

@ -32,7 +32,7 @@ different config format, I work on this.]
You can find the snapshot Debian packages at
ftp://yikes.tolna.net/pub/linux/release/debian/
Homepage is http://elinks.or.cz/
Homepage is http://elinks.cz/
-- Peter Gervai <grin@tolna.net>, Thu, 9 Oct 2003 01:17:26 +0200
@ -46,7 +46,7 @@ A: Please check the menus. Download manager is under Tools>Download, proxy
(with contribution by Jonas Fonseca)
Q: The "move" doesn't work in the bookmark manager.
A: Read <http://elinks.or.cz/documentation/bookmarks.txt>
A: Read <http://elinks.cz/documentation/html/manual.html-chunked/ch04.html>
to learn how to use this unintuitive feature.
(contributed by Miciah Dashiel Butler Masters)

2
debian/control.in vendored
View File

@ -17,4 +17,4 @@ Description: advanced text-mode WWW browser
language, IPV6 and has many other features. ELinks is linked against
GNUTLS to prevent license issues.
.
Homepage: http://elinks.or.cz/
Homepage: http://elinks.cz/

View File

@ -130,7 +130,7 @@ doc-dirs:
# $(MAN_DIR) intentionally left out
clean-local:
$(RM) -r $(HTML_DIR) $(XML_DIR) $(TXT_DIR) $(PDF_DIR) *.tmp
@$(RM) -r $(HTML_DIR) $(XML_DIR) $(TXT_DIR) $(PDF_DIR) *.tmp
# Autogenerated asciidoc files.

View File

@ -217,17 +217,17 @@ Triggered When:
Arguments:
unsigned char **html, int *html_len, struct session *ses, unsigned char *url
struct session *ses
struct cache_entry *cached
Description:
Makes it possible to fix up bad HTML code, remove tags etc. The HTML source
is changed by making @html point to the new source. If @html is changed the
event propagation should be ended and @html_len should be updated to the new
length of the document content.
Possible values for @html includes:
- new document content in a dynamically allocated string; or
- NULL to keep the content unchanged.
Makes it possible to fix up bad HTML code, remove tags etc. The parameter
cached is guaranteed to have a single fragment. The HTML source is changed
by replacing this fragment:
add_fragment(cached, 0, new_string, new_len);
normalize_cache_entry(cached, new_len);
-------------------------------------------------------------------------------
Name: quit

View File

@ -1,7 +1,7 @@
top_builddir=../../..
include $(top_builddir)/Makefile.config
elinks.1: $(srcdir)/elinks.1.in $(top_srcdir)/configure.in
elinks.1: $(srcdir)elinks.1.in $(top_srcdir)/configure.in
cd $(top_builddir) && \
CONFIG_FILES="$(RELPATH)$@" CONFIG_HEADERS= $(SHELL) ./config.status

View File

@ -1,6 +1,6 @@
top_builddir=../../..
include $(top_builddir)/Makefile.config
MAN5 = $(srcdir)/elinks.conf.5 $(srcdir)/elinkskeys.5
MAN5 = $(srcdir)elinks.conf.5 $(srcdir)elinkskeys.5
include $(top_srcdir)/Makefile.lib

View File

@ -395,6 +395,22 @@ CONFIG_CSS=yes
CONFIG_HTML_HIGHLIGHT=no
### ECMAScript (JavaScript) Browser Scripting
#
# By enabling this feature, certain parts of ELinks, such as the goto URL
# dialog, may be extended using ECMAScript (aka. JavaScript) scripts. This can
# be useful to optimise your usage of ELinks.
#
# For example you can define shortcuts (or abbreviations) for URLs of sites you
# often visit by having a goto URL hook expand them. This can also be achieved
# with the URI rewrite feature (CONFIG_URI_REWRITE), however it is not as
# powerful as doing it with scripting.
#
# Default: enabled if Spidermonkey is found
CONFIG_SM_SCRIPTING=yes
### Mouse Support
#
# ELinks may be controlled not only by keyboard, but also by mouse to quite some

1
po/.gitignore vendored
View File

@ -1,2 +1,3 @@
*.gmo
elinks.pot
potfiles.list

View File

@ -11,15 +11,16 @@ MSGMERGE = msgmerge
# xgettext)
POTFILES_ABS_LIST = potfiles.list
quiet_cmd_gmsgfmt = ' [$(PO_COLOR)GMSGFMT$(END_COLOR)] $(RELPATH)$(@)'
cmd_gmsgfmt = rm -f -- "$@" && $(GMSGFMT) --statistics -o "$@" -- "$<"
# Distributed elinks-*.tar.gz packages include po/*.gmo files, so that
# users can install ELinks even if they don't have a msgfmt program.
# However, if srcdir != builddir, then this Makefile ignores the *.gmo
# files in the srcdir and builds new ones in the builddir.
%.gmo: $(srcdir)/%.po
@file=`echo $* | sed 's,.*/,,'`.gmo \
&& rm -f $$file && echo -n $*": " \
&& $(GMSGFMT) --statistics -o $$file $<
%.gmo: $(srcdir)%.po
$(call ecmd,gmsgfmt)
### The default rule
@ -89,7 +90,7 @@ update-gmo: Makefile $(GMOFILES)
check-po:
@$(foreach lang,$(basename $(if $(strip $(PO)),$(PO),$(GMOFILES))), \
echo -n "$(lang): "; \
$(GMSGFMT) --check --check-accelerators=~ --verbose --statistics -o /dev/null $(srcdir)/$(lang).po; \
$(GMSGFMT) --check --check-accelerators="~" --verbose --statistics -o /dev/null $(srcdir)/$(lang).po; \
$(srcdir)/check-accelerator-contexts.pl $(srcdir)/$(lang).po \
)
@ -105,6 +106,6 @@ install-local: all-local
)
clean-local:
rm -f $(PACKAGE).po *.new.po $(srcdir)/$(POTFILES_ABS_LIST)
@rm -f $(PACKAGE).po *.new.po $(srcdir)/$(POTFILES_ABS_LIST)
include $(top_srcdir)/Makefile.lib

View File

@ -78,7 +78,7 @@ name of your language (in English) so you end up with something like:
------------------------------------------------
Finally to make it part of the ELinks distribution send it to one of the
mailinglists or file it as a bug at <http://bugzilla.elinks.or.cz>.
mailinglists or file it as a bug at <http://bugzilla.elinks.cz>.
2. Updating .po files:

405
po/fr.po

File diff suppressed because it is too large Load Diff

View File

@ -7,6 +7,7 @@ SUBDIRS-$(CONFIG_FORMHIST) += formhist
SUBDIRS-$(CONFIG_GLOBHIST) += globhist
SUBDIRS-$(CONFIG_ECMASCRIPT) += ecmascript
SUBDIRS-$(CONFIG_SCRIPTING) += scripting
SUBDIRS-$(CONFIG_DOM) += dom
SUBDIRS = \
bfu \

View File

@ -92,33 +92,33 @@ replace_listbox_item(struct listbox_item *item, struct listbox_data *data)
}
void
done_listbox_item(struct hierbox_browser *browser, struct listbox_item *box_item)
done_listbox_item(struct hierbox_browser *browser, struct listbox_item *item)
{
struct listbox_data *box_data;
assert(box_item && list_empty(box_item->child));
assert(item && list_empty(item->child));
/* The option dialog needs this test */
if (box_item->next) {
if (item->next) {
/* If we are removing the top or the selected box
* we have to figure out a replacement. */
foreach (box_data, browser->boxes) {
if (box_data->sel == box_item)
box_data->sel = replace_listbox_item(box_item,
if (box_data->sel == item)
box_data->sel = replace_listbox_item(item,
box_data);
if (box_data->top == box_item)
box_data->top = replace_listbox_item(box_item,
if (box_data->top == item)
box_data->top = replace_listbox_item(item,
box_data);
}
del_from_list(box_item);
del_from_list(item);
update_hierbox_browser(browser);
}
mem_free(box_item);
mem_free(item);
}
@ -186,10 +186,10 @@ hierbox_ev_kbd(struct dialog_data *dlg_data)
} else if (action_id == ACT_MENU_EXPAND) {
/* Recursively expand all folders */
if (!selected || box->sel->type != BI_FOLDER)
if (!selected || selected->type != BI_FOLDER)
return EVENT_PROCESSED;
recursively_set_expanded(box->sel, 1);
recursively_set_expanded(selected, 1);
} else if (action_id == ACT_MENU_SEARCH) {
if (!box->ops->match)
@ -407,28 +407,29 @@ push_hierbox_info_button(struct dialog_data *dlg_data, struct widget_data *butto
{
/* [gettext_accelerator_context(push_hierbox_info_button)] */
struct listbox_data *box = get_dlg_listbox_data(dlg_data);
struct listbox_item *item = box->sel;
struct terminal *term = dlg_data->win->term;
struct listbox_context *context;
unsigned char *msg;
if (!box->sel) return EVENT_PROCESSED;
if (!item) return EVENT_PROCESSED;
assert(box->ops);
context = init_listbox_context(box, term, box->sel, NULL);
context = init_listbox_context(box, term, item, NULL);
if (!context) return EVENT_PROCESSED;
msg = box->ops->get_info(context->item, term);
msg = box->ops->get_info(item, term);
if (!msg) {
mem_free(context);
if (box->sel->type == BI_FOLDER) {
if (item->type == BI_FOLDER) {
info_box(term, 0, N_("Info"), ALIGN_CENTER,
N_("Press space to expand this folder."));
}
return EVENT_PROCESSED;
}
box->ops->lock(context->item);
box->ops->lock(item);
msg_box(term, getml(context, NULL), MSGBOX_FREE_TEXT /* | MSGBOX_SCROLLABLE */,
N_("Info"), ALIGN_LEFT,
@ -442,25 +443,36 @@ push_hierbox_info_button(struct dialog_data *dlg_data, struct widget_data *butto
/* Goto action */
static void recursively_goto_each_listbox(struct session *ses,
struct listbox_item *root,
struct listbox_data *box);
static void
recursively_goto_listbox(struct session *ses, struct listbox_item *root,
recursively_goto_listbox(struct session *ses, struct listbox_item *item,
struct listbox_data *box)
{
if (item->type == BI_FOLDER) {
recursively_goto_each_listbox(ses, item, box);
return;
} else if (item->type == BI_LEAF) {
struct uri *uri = box->ops->get_uri(item);
if (!uri) return;
open_uri_in_new_tab(ses, uri, 1, 0);
done_uri(uri);
}
}
static void
recursively_goto_each_listbox(struct session *ses, struct listbox_item *root,
struct listbox_data *box)
{
struct listbox_item *item;
foreach (item, root->child) {
if (item->type == BI_FOLDER) {
recursively_goto_listbox(ses, item, box);
continue;
} else if (item->type == BI_LEAF) {
struct uri *uri = box->ops->get_uri(item);
if (!uri) continue;
open_uri_in_new_tab(ses, uri, 1, 0);
done_uri(uri);
}
recursively_goto_listbox(ses, item, box);
}
}
@ -473,18 +485,7 @@ goto_marked(struct listbox_item *item, void *data_, int *offset)
struct session *ses = context->dlg_data->dlg->udata;
struct listbox_data *box = context->box;
if (item->type == BI_FOLDER) {
recursively_goto_listbox(ses, item, box);
return 0;
} else if (item->type == BI_LEAF) {
struct uri *uri = box->ops->get_uri(item);
if (!uri) return 0;
open_uri_in_new_tab(ses, uri, 1, 0);
done_uri(uri);
}
recursively_goto_listbox(ses, item, box);
}
return 0;
@ -495,14 +496,14 @@ push_hierbox_goto_button(struct dialog_data *dlg_data,
struct widget_data *button)
{
struct listbox_data *box = get_dlg_listbox_data(dlg_data);
struct listbox_item *item = box->sel;
struct session *ses = dlg_data->dlg->udata;
struct terminal *term = dlg_data->win->term;
struct listbox_context *context;
/* Do nothing with a folder */
if (!box->sel) return EVENT_PROCESSED;
if (!item) return EVENT_PROCESSED;
context = init_listbox_context(box, term, box->sel, scan_for_marks);
context = init_listbox_context(box, term, item, scan_for_marks);
if (!context) return EVENT_PROCESSED;
if (!context->item) {
@ -511,11 +512,11 @@ push_hierbox_goto_button(struct dialog_data *dlg_data,
context->box, 0, 0,
goto_marked, context);
} else if (box->sel->type == BI_FOLDER) {
recursively_goto_listbox(ses, box->sel, box);
} else if (item->type == BI_FOLDER) {
recursively_goto_each_listbox(ses, item, box);
} else if (box->sel->type == BI_LEAF) {
struct uri *uri = box->ops->get_uri(box->sel);
} else if (item->type == BI_LEAF) {
struct uri *uri = box->ops->get_uri(item);
if (uri) {
goto_uri(ses, uri);
@ -543,17 +544,49 @@ enum delete_error {
DELETE_ERRORS,
};
unsigned char *delete_messages[2][DELETE_ERRORS] = {
{
N_("Sorry, but the item \"%s\" cannot be deleted."),
N_("Sorry, but the item \"%s\" is being used by something else."),
},
{
N_("Sorry, but the folder \"%s\" cannot be deleted."),
N_("Sorry, but the folder \"%s\" is being used by something else."),
},
struct listbox_ops_messages default_listbox_ops_messages = {
/* cant_delete_item */
N_("Sorry, but the item \"%s\" cannot be deleted."),
/* cant_delete_used_item */
N_("Sorry, but the item \"%s\" is being used by something else."),
/* cant_delete_folder */
N_("Sorry, but the folder \"%s\" cannot be deleted."),
/* cant_delete_used_folder */
N_("Sorry, but the folder \"%s\" is being used by something else."),
/* delete_marked_items_title */
N_("Delete marked items"),
/* delete_marked_items */
N_("Delete marked items?"),
/* delete_folder_title */
N_("Delete folder"),
/* delete_folder */
N_("Delete the folder \"%s\" and its content?"),
/* delete_item_title */
N_("Delete item"),
/* delete_item */
N_("Delete \"%s\"?\n\n%s"),
/* clear_all_items_title */
N_("Clear all items"),
/* clear_all_items */
N_("Do you really want to remove all items?"),
};
#define listbox_message(msg) \
ops->messages && ops->messages->msg \
? ops->messages->msg \
: default_listbox_ops_messages.msg
static void
print_delete_error(struct listbox_item *item, struct terminal *term,
struct listbox_ops *ops, enum delete_error err)
@ -565,29 +598,17 @@ print_delete_error(struct listbox_item *item, struct terminal *term,
switch (err) {
case DELETE_IMPOSSIBLE:
if (item->type == BI_FOLDER) {
if (ops->messages && ops->messages->cant_delete_folder)
errmsg = ops->messages->cant_delete_folder;
else
errmsg = delete_messages[1][DELETE_IMPOSSIBLE];
errmsg = listbox_message(cant_delete_folder);
} else {
if (ops->messages && ops->messages->cant_delete_item)
errmsg = ops->messages->cant_delete_item;
else
errmsg = delete_messages[0][DELETE_IMPOSSIBLE];
errmsg = listbox_message(cant_delete_item);
}
break;
case DELETE_LOCKED:
if (item->type == BI_FOLDER) {
if (ops->messages && ops->messages->cant_delete_used_folder)
errmsg = ops->messages->cant_delete_used_folder;
else
errmsg = delete_messages[1][DELETE_LOCKED];
errmsg = listbox_message(cant_delete_used_folder);
} else {
if (ops->messages && ops->messages->cant_delete_used_item)
errmsg = ops->messages->cant_delete_used_item;
else
errmsg = delete_messages[0][DELETE_LOCKED];
errmsg = listbox_message(cant_delete_used_item);
}
break;
@ -696,29 +717,24 @@ push_hierbox_delete_button(struct dialog_data *dlg_data,
/* [gettext_accelerator_context(push_hierbox_delete_button)] */
struct terminal *term = dlg_data->win->term;
struct listbox_data *box = get_dlg_listbox_data(dlg_data);
struct listbox_ops *ops = box->ops;
struct listbox_item *item = box->sel;
struct listbox_context *context;
unsigned char *text;
enum delete_error delete;
if (!box->sel) return EVENT_PROCESSED;
if (!item) return EVENT_PROCESSED;
assert(box->ops && box->ops->can_delete && box->ops->delete);
assert(ops && ops->can_delete && ops->delete);
context = init_listbox_context(box, term, box->sel, scan_for_marks);
context = init_listbox_context(box, term, item, scan_for_marks);
if (!context) return EVENT_PROCESSED;
context->widget_data = dlg_data->widgets_data;
if (!context->item) {
unsigned char *title = N_("Delete marked items");
unsigned char *message = N_("Delete marked items?");
if (box->ops->messages) {
if (box->ops->messages->delete_marked_items)
message = box->ops->messages->delete_marked_items;
if (box->ops->messages->delete_marked_items_title)
title = box->ops->messages->delete_marked_items_title;
}
unsigned char *title = listbox_message(delete_marked_items_title);
unsigned char *message = listbox_message(delete_marked_items);
msg_box(term, getml(context, NULL), 0,
title, ALIGN_CENTER,
@ -729,33 +745,26 @@ push_hierbox_delete_button(struct dialog_data *dlg_data,
return EVENT_PROCESSED;
}
delete = box->ops->can_delete(context->item)
delete = ops->can_delete(context->item)
? DELETE_LOCKED : DELETE_IMPOSSIBLE;
if (delete == DELETE_IMPOSSIBLE || box->ops->is_used(context->item)) {
print_delete_error(context->item, term, box->ops, delete);
if (delete == DELETE_IMPOSSIBLE || ops->is_used(context->item)) {
print_delete_error(context->item, term, ops, delete);
mem_free(context);
return EVENT_PROCESSED;
}
text = box->ops->get_text(context->item, term);
text = ops->get_text(context->item, term);
if (!text) {
mem_free(context);
return EVENT_PROCESSED;
}
if (context->item->type == BI_FOLDER) {
unsigned char *title = N_("Delete folder");
unsigned char *message = N_("Delete the folder \"%s\" and its content?");
unsigned char *title = listbox_message(delete_folder_title);
unsigned char *message = listbox_message(delete_folder);
if (box->ops->messages) {
if (box->ops->messages->delete_folder)
message = box->ops->messages->delete_folder;
if (box->ops->messages->delete_folder_title)
title = box->ops->messages->delete_folder_title;
}
box->ops->lock(context->item);
ops->lock(context->item);
msg_box(term, getml(context, NULL), MSGBOX_FREE_TEXT,
title, ALIGN_CENTER,
msg_text(term, message, text),
@ -763,19 +772,11 @@ push_hierbox_delete_button(struct dialog_data *dlg_data,
N_("~Yes"), push_ok_delete_button, B_ENTER,
N_("~No"), done_listbox_context, B_ESC);
} else {
unsigned char *title = N_("Delete item");
unsigned char *message = N_("Delete \"%s\"?\n\n%s");
unsigned char *msg;
unsigned char *title = listbox_message(delete_item_title);
unsigned char *message = listbox_message(delete_item);
unsigned char *msg = ops->get_info(context->item, term);
if (box->ops->messages) {
if (box->ops->messages->delete_item)
message = box->ops->messages->delete_item;
if (box->ops->messages->delete_item_title)
title = box->ops->messages->delete_item_title;
}
msg = box->ops->get_info(context->item, term);
box->ops->lock(context->item);
ops->lock(context->item);
msg_box(term, getml(context, NULL), MSGBOX_FREE_TEXT,
title, ALIGN_LEFT,
@ -820,14 +821,15 @@ push_hierbox_clear_button(struct dialog_data *dlg_data,
{
/* [gettext_accelerator_context(push_hierbox_clear_button)] */
struct listbox_data *box = get_dlg_listbox_data(dlg_data);
struct listbox_ops *ops = box->ops;
struct terminal *term = dlg_data->win->term;
struct listbox_context *context;
unsigned char *title = N_("Clear all items");
unsigned char *message = N_("Do you really want to remove all items?");
unsigned char *title = listbox_message(clear_all_items_title);
unsigned char *message = listbox_message(clear_all_items);
if (!box->sel) return EVENT_PROCESSED;
assert(box->ops);
assert(ops);
context = init_listbox_context(box, term, NULL, scan_for_used);
if (!context) return EVENT_PROCESSED;
@ -837,18 +839,11 @@ push_hierbox_clear_button(struct dialog_data *dlg_data,
* not all items can be deleted scan_for_used() should also can
* for undeletable and we should be able to pass either delete
* error types. */
print_delete_error(context->item, term, box->ops, DELETE_LOCKED);
print_delete_error(context->item, term, ops, DELETE_LOCKED);
mem_free(context);
return EVENT_PROCESSED;
}
if (box->ops->messages) {
if (box->ops->messages->clear_all_items)
message = box->ops->messages->clear_all_items;
if (box->ops->messages->clear_all_items_title)
title = box->ops->messages->clear_all_items_title;
}
msg_box(term, getml(context, NULL), 0,
title, ALIGN_CENTER,
message,
@ -859,6 +854,8 @@ push_hierbox_clear_button(struct dialog_data *dlg_data,
return EVENT_PROCESSED;
}
#undef listbox_message
/* Search action */

View File

@ -50,7 +50,7 @@ struct hierbox_browser {
ops, \
}
void done_listbox_item(struct hierbox_browser *browser, struct listbox_item *box_item);
void done_listbox_item(struct hierbox_browser *browser, struct listbox_item *item);
void update_hierbox_browser(struct hierbox_browser *browser);
struct listbox_item *

View File

@ -86,6 +86,19 @@ do_tab_compl(struct dialog_data *dlg_data, struct list_head *history)
}
}
/* Return the length of the common substring from the starts
* of the two strings a and b. */
static inline int
strcommonlen(unsigned char *a, unsigned char *b)
{
unsigned char *start = a;
while (*a && *a == *b)
++a, ++b;
return a - start;
}
/* Complete to the last unambiguous character. Eg., I've been to google.com,
* google.com/search?q=foo, and google.com/search?q=bar. This function then
* completes `go' to `google.com' and `google.com/' to `google.com/search?q='.
@ -96,41 +109,32 @@ do_tab_compl_unambiguous(struct dialog_data *dlg_data, struct list_head *history
struct widget_data *widget_data = selected_widget(dlg_data);
int base_len = strlen(widget_data->cdata);
/* Maximum number of characters in a match. Characters after this
* position are varying in other matches. Zero means that no max has
* been set yet. */
* position are varying in other matches. */
int longest_common_match = 0;
unsigned char *match = NULL;
struct input_history_entry *entry;
foreach (entry, *history) {
unsigned char *cur = entry->data;
unsigned char *matchpos = match ? match : widget_data->cdata;
int cur_len = 0;
for (; *cur && *cur == *matchpos; ++cur, ++matchpos) {
++cur_len;
/* XXX: I think that unifying the two cases of this
* test could seriously hurt readability. --pasky */
if (longest_common_match
&& cur_len >= longest_common_match)
break;
}
int cur_len = strcommonlen(cur, match ? match
: widget_data->cdata);
/* Throw away it away if it isn't even as long as what the user
* entered. */
if (cur_len < base_len)
continue;
if (!match) cur_len = strlen(entry->data);
/* By now, @cur_len oscillates between @base_len and
* @longest_common_match. */
if (longest_common_match
&& cur_len >= longest_common_match)
continue;
/* We found the next shortest common match. */
longest_common_match = cur_len;
match = entry->data;
if (!match) {
/* This is the first match, so its length is the maximum
* for any future matches. */
longest_common_match = strlen(entry->data);
match = entry->data;
} else if (cur_len < longest_common_match) {
/* The current match has a shorter substring in common
* with the previous candidates, so the common substring
* shrinks. */
longest_common_match = cur_len;
}
}
if (!match) return;

View File

@ -354,18 +354,22 @@ static int
display_listbox_item(struct listbox_item *item, void *data_, int *offset)
{
struct listbox_context *data = data_;
unsigned char *stylename;
int len; /* Length of the current text field. */
struct color_pair *color;
struct color_pair *tree_color, *text_color;
int depth = item->depth + 1;
int d;
int x, y;
stylename = (item == data->box->sel) ? "menu.selected"
: ((item->marked) ? "menu.marked"
: "menu.normal");
tree_color = get_bfu_color(data->term, "menu.normal");
if (item == data->box->sel) {
text_color = get_bfu_color(data->term, "menu.selected");
color = get_bfu_color(data->term, stylename);
} else if (item->marked) {
text_color = get_bfu_color(data->term, "menu.marked");
} else {
text_color = tree_color;
}
y = data->widget_data->box.y + data->offset;
for (d = 0; d < depth - 1; d++) {
@ -380,13 +384,13 @@ display_listbox_item(struct listbox_item *item, void *data_, int *offset)
/* XXX */
x = data->widget_data->box.x + d * 5;
draw_text(data->term, x, y, " ", 5, 0, color);
draw_text(data->term, x, y, " ", 5, 0, tree_color);
if (root ? root->child.prev == child
: data->box->items->prev == child)
continue; /* We were the last branch. */
draw_border_char(data->term, x + 1, y, BORDER_SVLINE, color);
draw_border_char(data->term, x + 1, y, BORDER_SVLINE, tree_color);
}
if (depth) {
@ -429,7 +433,7 @@ display_listbox_item(struct listbox_item *item, void *data_, int *offset)
x = data->widget_data->box.x + (depth - 1) * 5;
for (i = 0; i < 5; i++) {
draw_border_char(data->term, x + i, y, str[i], color);
draw_border_char(data->term, x + i, y, str[i], tree_color);
}
}
@ -440,7 +444,7 @@ display_listbox_item(struct listbox_item *item, void *data_, int *offset)
int width = data->widget_data->box.width - depth * 5;
for (i = 0; i < width; i++) {
draw_border_char(data->term, x + i, y, BORDER_SHLINE, color);
draw_border_char(data->term, x + i, y, BORDER_SHLINE, text_color);
}
} else if (data->box->ops && data->box->ops->draw) {
@ -460,15 +464,14 @@ display_listbox_item(struct listbox_item *item, void *data_, int *offset)
len = strlen(text);
int_upper_bound(&len, int_max(0, data->widget_data->box.width - depth * 5));
draw_text(data->term, x, y, text, len, 0, color);
draw_text(data->term, x, y, text, len, 0, text_color);
mem_free(text);
}
if (item == data->box->sel) {
x = data->widget_data->box.x;
/* For blind users: */
x = data->widget_data->box.x + 5 + item->depth * 5;
set_cursor(data->term, x, y, 1);
set_window_ptr(data->dlg_data->win, x, y);
}
@ -605,106 +608,117 @@ mouse_listbox(struct dialog_data *dlg_data, struct widget_data *widget_data)
}
static widget_handler_status_T
kbd_listbox(struct dialog_data *dlg_data, struct widget_data *widget_data)
do_kbd_listbox_action(enum menu_action action_id, struct dialog_data *dlg_data,
struct widget_data *widget_data)
{
struct widget_data *dlg_item = dlg_data->widgets_data;
switch (action_id) {
case ACT_MENU_DOWN:
listbox_sel_move(dlg_item, 1);
display_widget(dlg_data, dlg_item);
return EVENT_PROCESSED;
case ACT_MENU_UP:
listbox_sel_move(dlg_item, -1);
display_widget(dlg_data, dlg_item);
return EVENT_PROCESSED;
case ACT_MENU_PAGE_DOWN:
{
struct listbox_data *box;
box = get_listbox_widget_data(dlg_item);
listbox_sel_move(dlg_item,
2 * dlg_item->box.height
- box->sel_offset - 1);
display_widget(dlg_data, dlg_item);
return EVENT_PROCESSED;
}
case ACT_MENU_PAGE_UP:
{
struct listbox_data *box;
box = get_listbox_widget_data(dlg_item);
listbox_sel_move(dlg_item,
-dlg_item->box.height
- box->sel_offset);
display_widget(dlg_data, dlg_item);
return EVENT_PROCESSED;
}
case ACT_MENU_HOME:
listbox_sel_move(dlg_item, -INT_MAX);
display_widget(dlg_data, dlg_item);
return EVENT_PROCESSED;
case ACT_MENU_END:
listbox_sel_move(dlg_item, INT_MAX);
display_widget(dlg_data, dlg_item);
return EVENT_PROCESSED;
case ACT_MENU_MARK_ITEM:
{
struct listbox_data *box;
box = get_listbox_widget_data(dlg_item);
if (box->sel) {
box->sel->marked = !box->sel->marked;
listbox_sel_move(dlg_item, 1);
}
display_widget(dlg_data, dlg_item);
return EVENT_PROCESSED;
}
case ACT_MENU_DELETE:
{
struct listbox_data *box;
box = get_listbox_widget_data(dlg_item);
if (box->ops
&& box->ops->delete
&& box->ops->can_delete)
push_hierbox_delete_button(dlg_data,
widget_data);
return EVENT_PROCESSED;
}
default:
break;
}
return EVENT_NOT_PROCESSED;
}
static widget_handler_status_T
kbd_listbox(struct dialog_data *dlg_data, struct widget_data *widget_data)
{
struct term_event *ev = dlg_data->term_event;
/* Not a pure listbox, but you're not supposed to use this outside of
* the listbox browser anyway, so what.. */
switch (ev->ev) {
enum menu_action action_id;
case EVENT_KBD:
{
enum menu_action action_id;
action_id = kbd_action(KEYMAP_MENU, ev, NULL);
/* Moving the box */
if (action_id == ACT_MENU_DOWN) {
listbox_sel_move(dlg_item, 1);
display_widget(dlg_data, dlg_item);
return EVENT_PROCESSED;
}
if (action_id == ACT_MENU_UP) {
listbox_sel_move(dlg_item, -1);
display_widget(dlg_data, dlg_item);
return EVENT_PROCESSED;
}
if (action_id == ACT_MENU_PAGE_DOWN) {
struct listbox_data *box;
box = get_listbox_widget_data(dlg_item);
listbox_sel_move(dlg_item,
2 * dlg_item->box.height
- box->sel_offset - 1);
display_widget(dlg_data, dlg_item);
return EVENT_PROCESSED;
}
if (action_id == ACT_MENU_PAGE_UP) {
struct listbox_data *box;
box = get_listbox_widget_data(dlg_item);
listbox_sel_move(dlg_item,
-dlg_item->box.height
- box->sel_offset);
display_widget(dlg_data, dlg_item);
return EVENT_PROCESSED;
}
if (action_id == ACT_MENU_HOME) {
listbox_sel_move(dlg_item, -INT_MAX);
display_widget(dlg_data, dlg_item);
return EVENT_PROCESSED;
}
if (action_id == ACT_MENU_END) {
listbox_sel_move(dlg_item, INT_MAX);
display_widget(dlg_data, dlg_item);
return EVENT_PROCESSED;
}
if (action_id == ACT_MENU_MARK_ITEM) {
struct listbox_data *box;
box = get_listbox_widget_data(dlg_item);
if (box->sel) {
box->sel->marked = !box->sel->marked;
listbox_sel_move(dlg_item, 1);
}
display_widget(dlg_data, dlg_item);
return EVENT_PROCESSED;
}
if (action_id == ACT_MENU_DELETE) {
struct listbox_data *box;
box = get_listbox_widget_data(dlg_item);
if (box->ops
&& box->ops->delete
&& box->ops->can_delete)
push_hierbox_delete_button(dlg_data,
widget_data);
return EVENT_PROCESSED;
}
/* Selecting a button; most probably ;). */
break;
return do_kbd_listbox_action(action_id, dlg_data, widget_data);
}
case EVENT_INIT:
case EVENT_RESIZE:
case EVENT_REDRAW:

View File

@ -254,80 +254,57 @@ count_menu_size(struct terminal *term, struct menu *menu)
int_bounds(&menu->box.y, 0, height - my);
}
static int
search_selectable(struct menu *menu, int pos, int dir)
{
assert(pos >= 0 && pos < menu->size && (dir == 1 || dir == -1));
if_assert_failed return -1;
while (!mi_is_selectable(&menu->items[pos])) {
if (dir > 0 && pos == menu->size - 1)
return -1;
else if (dir < 0 && pos == 0)
return -1;
pos += dir;
}
return pos;
}
static void
scroll_menu(struct menu *menu, int steps, int wrap)
{
int height, scr_i, pos;
int height, scr_i, pos, start;
int s = steps ? steps/abs(steps) : 1; /* Selectable item search direction. */
if (menu->size <= 0) {
no_item:
/* Menu is empty. */
menu->selected = -1;
menu->first = 0;
return;
}
/* Move by required steps and handle wraparound if needed.
* A step of zero can be used, indicating we want to select
* item corresponding to |menu->selected| value rather than
* moving by to a position relative to this value.
* We override search direction for selectable items if we encounter
* a limit, since it depends in which conditions this limit is
* attained. */
menu->selected += steps;
if (menu->selected >= menu->size) {
if (wrap) {
menu->selected = 0;
s = 1;
} else {
menu->selected = int_max(0, menu->size - 1);
s = -1;
}
} else if (menu->selected < 0) {
if (wrap) {
menu->selected = int_max(0, menu->size - 1);
s = -1;
} else {
menu->selected = 0;
s = 1;
start = pos = menu->selected;
if (!steps) steps = 1, --pos;
while (steps) {
pos += s, steps -= s;
while (1) {
if (start == pos) {
goto select_item;
} else if (pos >= menu->size && s == 1) {
if (wrap) {
pos = 0;
} else {
pos = menu->size - 1;
goto select_item;
}
} else if (pos < 0 && s == -1) {
if (wrap) {
pos = menu->size - 1;
} else {
pos = 0;
goto select_item;
}
} else if (!mi_is_selectable(&menu->items[pos])) {
pos += s;
} else {
break;
}
if (start == -1) start = 0;
}
}
/* Current selected item may be an unselectable item, so we need to
* find first selectable item near to it.
* @s = 1 : ascending search.
* @s = -1: descending search. */
/* Search first selectable item in one direction. */
pos = search_selectable(menu, menu->selected, s);
if (pos == -1) {
/* If not found, invert the search direction and try again. */
pos = search_selectable(menu, menu->selected, -s);
}
/* No selectable item found, just return. */
if (pos == -1) {
menu->selected = -1;
menu->first = 0;
}
select_item:
if (!mi_is_selectable(&menu->items[pos]))
goto no_item;
menu->selected = pos;

View File

@ -1,6 +1,7 @@
#ifndef EL__BFU_MSGBOX_H
#define EL__BFU_MSGBOX_H
#include "util/align.h"
#include "util/memlist.h"
struct terminal;

View File

@ -39,6 +39,8 @@ static struct bookmarks_backend *bookmarks_backends[] = {
};
static int loaded_backend_num = -1;
/* Loads the bookmarks from file */
void
bookmarks_read(void)
@ -67,6 +69,7 @@ bookmarks_read(void)
fclose(f);
bookmarks_unset_dirty();
loaded_backend_num = backend_num;
}
void
@ -77,7 +80,7 @@ bookmarks_write(struct list_head *bookmarks_list)
struct secure_save_info *ssi;
unsigned char *file_name;
if (!bookmarks_are_dirty()) return;
if (!bookmarks_are_dirty() && backend_num == loaded_backend_num) return;
if (!backend
|| !backend->write
|| !elinks_home

View File

@ -366,7 +366,6 @@ add_bookmark(struct bookmark *root, int place, unsigned char *title,
type = BI_FOLDER;
}
/* Setup box_item */
bm->box_item = add_listbox_item(&bookmark_browser,
root ? root->box_item : NULL,
type,
@ -443,6 +442,22 @@ update_bookmark(struct bookmark *bm, unsigned char *title,
return 1;
}
/* Search for a bookmark with the given title. Search in the given folder
* or in the root if folder is NULL. */
struct bookmark *
get_bookmark_by_name(struct bookmark *folder, unsigned char *title)
{
struct bookmark *bookmark;
struct list_head *lh;
lh = folder ? &folder->child : &bookmarks;
foreach (bookmark, *lh)
if (!strcmp(bookmark->title, title)) return bookmark;
return NULL;
}
/* Search bookmark cache for item matching url. */
struct bookmark *
get_bookmark(unsigned char *url)

View File

@ -43,6 +43,8 @@ int bookmarks_are_dirty(void);
void delete_bookmark(struct bookmark *);
struct bookmark *add_bookmark(struct bookmark *, int, unsigned char *, unsigned char *);
struct bookmark *get_bookmark_by_name(struct bookmark *folder,
unsigned char *title);
struct bookmark *get_bookmark(unsigned char *url);
void bookmark_terminal_tabs(struct terminal *term, unsigned char *foldername);
void bookmark_auto_save_tabs(struct terminal *term);

View File

@ -46,6 +46,7 @@ ACTION_(MAIN, "link-external-command", LINK_EXTERNAL_COMMAND, N__("Pass URI of c
ACTION_(MAIN, "link-follow", LINK_FOLLOW, N__("Follow the current link"), ACTION_REQUIRE_VIEW_STATE | ACTION_REQUIRE_LOCATION | ACTION_JUMP_TO_LINK | ACTION_REQUIRE_LINK),
ACTION_(MAIN, "link-follow-reload", LINK_FOLLOW_RELOAD, N__("Follow the current link, forcing reload of the target"), ACTION_REQUIRE_VIEW_STATE | ACTION_REQUIRE_LOCATION | ACTION_JUMP_TO_LINK | ACTION_REQUIRE_LINK),
ACTION_(MAIN, "link-menu", LINK_MENU, N__("Open the link context menu"), ACTION_REQUIRE_VIEW_STATE | ACTION_JUMP_TO_LINK | ACTION_REQUIRE_LINK),
ACTION_(MAIN, "link-form-menu", LINK_FORM_MENU, N__("Open the form fields menu"), ACTION_REQUIRE_VIEW_STATE | ACTION_JUMP_TO_LINK | ACTION_REQUIRE_LINK | ACTION_REQUIRE_FORM),
ACTION_(MAIN, "lua-console", LUA_CONSOLE, N__("Open a Lua console"), ACTION_RESTRICT_ANONYMOUS),
ACTION_(MAIN, "mark-goto", MARK_GOTO, N__("Go at a specified mark"), ACTION_REQUIRE_VIEW_STATE),
ACTION_(MAIN, "mark-set", MARK_SET, N__("Set a mark"), ACTION_REQUIRE_VIEW_STATE),

View File

@ -204,6 +204,17 @@ kbd_nm_lookup(enum keymap_id keymap_id, unsigned char *name)
return kbd_act_lookup(keymap_id, action_id);
}
static struct keybinding *
kbd_stroke_lookup(enum keymap_id keymap_id, unsigned char *keystroke_str)
{
struct term_event_keyboard kbd;
if (parse_keystroke(keystroke_str, &kbd) < 0)
return NULL;
return kbd_ev_lookup(keymap_id, &kbd, NULL);
}
static struct keymap keymap_table[] = {
{ "main", KEYMAP_MAIN, N_("Main mapping") },
@ -216,6 +227,26 @@ static struct keymap keymap_table[] = {
* Config file helpers.
*/
static struct action *
get_action_from_keystroke(enum keymap_id keymap_id,
unsigned char *keystroke_str)
{
struct keybinding *keybinding = kbd_stroke_lookup(keymap_id,
keystroke_str);
return keybinding ? get_action(keymap_id, keybinding->action_id) : NULL;
}
unsigned char *
get_action_name_from_keystroke(enum keymap_id keymap_id,
unsigned char *keystroke_str)
{
struct action *action = get_action_from_keystroke(keymap_id,
keystroke_str);
return action ? action->str : NULL;
}
action_id_T
get_action_from_string(enum keymap_id keymap_id, unsigned char *str)
{
@ -854,7 +885,6 @@ unsigned char *
bind_act(unsigned char *keymap_str, unsigned char *keystroke_str)
{
enum keymap_id keymap_id;
struct term_event_keyboard kbd;
unsigned char *action;
struct keybinding *keybinding;
@ -862,10 +892,7 @@ bind_act(unsigned char *keymap_str, unsigned char *keystroke_str)
if (keymap_id < 0)
return NULL;
if (parse_keystroke(keystroke_str, &kbd) < 0)
return NULL;
keybinding = kbd_ev_lookup(keymap_id, &kbd, NULL);
keybinding = kbd_stroke_lookup(keymap_id, keystroke_str);
if (!keybinding) return NULL;
action = get_action_name(keymap_id, keybinding->action_id);

View File

@ -43,6 +43,7 @@ enum action_flags {
ACTION_REQUIRE_LOCATION = (1 << 18),
ACTION_JUMP_TO_LINK = (1 << 19),
ACTION_REQUIRE_LINK = (1 << 20),
ACTION_REQUIRE_FORM = (1 << 21),
ACTION_FLAGS_MASK = (0xFF << 16),
};
@ -123,6 +124,8 @@ void free_keybinding(struct keybinding *);
struct action *get_action(enum keymap_id keymap_id, action_id_T action_id);
unsigned char *get_action_name(enum keymap_id keymap_id, action_id_T action_id);
action_id_T get_action_from_string(enum keymap_id keymap_id, unsigned char *str);
unsigned char *get_action_name_from_keystroke(enum keymap_id keymap_id,
unsigned char *keystroke_str);
static inline unsigned int
action_is_anonymous_safe(enum keymap_id keymap_id, action_id_T action_id)
@ -164,6 +167,14 @@ action_requires_link(enum keymap_id keymap_id, action_id_T action_id)
return action && (action->flags & ACTION_REQUIRE_LINK);
}
static inline unsigned int
action_requires_form(enum keymap_id keymap_id, action_id_T action_id)
{
struct action *action = get_action(keymap_id, action_id);
return action && (action->flags & ACTION_REQUIRE_FORM);
}
long read_key(unsigned char *);
unsigned char *get_keymap_name(enum keymap_id);

View File

@ -77,10 +77,9 @@ enum option_type {
OPT_TREE,
};
/* Defined in bfu/listbox.h, later and session/session.h */
struct listbox_item;
struct option;
struct session;
struct listbox_item; /* bfu/listbox.h */
struct option; /* defined later in this file */
struct session; /* session/session.h */
union option_value {
/* XXX: Keep first to make @options_root initialization possible. */

View File

@ -2,8 +2,19 @@ top_builddir=../..
include $(top_builddir)/Makefile.config
OBJS = cookies.o dialogs.o parser.o
PROG = parsetst
parsetst: parser.o parsetst.o
$(call cmd,link) -L../util/libutil.a
PARSETSTDEPS = \
$(top_builddir)/src/util/error.o \
$(top_builddir)/src/util/string.o
ifdef CONFIG_DEBUG
PARSETSTDEPS += $(top_builddir)/src/util/memdebug.o
endif
parsetst: $(PARSETSTDEPS) parser.o parsetst.o
$(call cmd,link)
CLEAN += parsetst.o
include $(top_srcdir)/Makefile.lib

View File

@ -602,9 +602,6 @@ is_path_prefix(unsigned char *d, unsigned char *s)
}
#define is_expired(t) ((t) && (t) <= time(NULL))
#define is_dead(t) (!(t) || (t) <= time(NULL))
struct string *
send_cookies(struct uri *uri)
{
@ -612,6 +609,7 @@ send_cookies(struct uri *uri)
struct cookie *c, *next;
unsigned char *path = NULL;
static struct string header;
time_t now;
if (!uri->host || !uri->data)
return NULL;
@ -626,12 +624,13 @@ send_cookies(struct uri *uri)
init_string(&header);
now = time(NULL);
foreachsafe (c, next, cookies) {
if (!is_in_domain(c->domain, uri->host, uri->hostlen)
|| !is_path_prefix(c->path, path))
continue;
if (is_expired(c->expires)) {
if (c->expires && c->expires <= now) {
#ifdef DEBUG_COOKIES
DBG("Cookie %s=%s (exp %d) expired.",
c->name, c->value, c->expires);
@ -681,6 +680,7 @@ load_cookies(void) {
unsigned char in_buffer[6 * MAX_STR_LEN];
unsigned char *cookfile = COOKIES_FILENAME;
FILE *fp;
time_t now;
if (elinks_home) {
cookfile = straconcat(elinks_home, cookfile, NULL);
@ -701,6 +701,7 @@ load_cookies(void) {
* periodically to our death. */
cookies_nosave = 1;
now = time(NULL);
while (fgets(in_buffer, 6 * MAX_STR_LEN, fp)) {
struct cookie *cookie;
unsigned char *p, *q = in_buffer;
@ -728,7 +729,7 @@ load_cookies(void) {
/* Skip expired cookies if any. */
expires = str_to_time_t(members[EXPIRES].pos);
if (is_dead(expires)) {
if (!expires || expires <= now) {
cookies_dirty = 1;
continue;
}
@ -765,6 +766,7 @@ save_cookies(void) {
struct cookie *c;
unsigned char *cookfile;
struct secure_save_info *ssi;
time_t now;
if (cookies_nosave || !elinks_home || !cookies_dirty
|| get_cmd_opt_bool("anonymous"))
@ -777,8 +779,9 @@ save_cookies(void) {
mem_free(cookfile);
if (!ssi) return;
now = time(NULL);
foreach (c, cookies) {
if (is_dead(c->expires)) continue;
if (!c->expires || c->expires <= now) continue;
if (secure_fprintf(ssi, "%s\t%s\t%s\t%s\t%s\t%ld\t%d\n",
c->name, c->value,
c->server->host,

View File

@ -32,8 +32,6 @@ struct cookie {
time_t expires; /* Expiration time. Zero means undefined */
int secure; /* Did it have 'secure' attribute */
/* This is indeed maintained by cookies.c, not dialogs.c; much easier
* and simpler. */
struct listbox_item *box_item;
};

View File

@ -423,7 +423,7 @@ static struct hierbox_browser_button cookie_buttons[] = {
{ N_("~Add"), push_add_button, 1 },
{ N_("~Edit"), push_edit_button, 1 },
{ N_("~Delete"), push_hierbox_delete_button, 1 },
{ N_("~Clear"), push_hierbox_clear_button, 1 },
{ N_("C~lear"), push_hierbox_clear_button, 1 },
{ N_("Sa~ve"), push_save_button, 0 },
};

View File

@ -2,7 +2,7 @@ top_builddir=../..
include $(top_builddir)/Makefile.config
SUBDIRS-$(CONFIG_CSS) += css
SUBDIRS-$(CONFIG_DOM) += dom sgml
SUBDIRS-$(CONFIG_DOM) += dom
SUBDIRS = html plain

View File

@ -43,12 +43,12 @@ static const struct scan_table_info css_scan_table_info[] = {
SCAN_TABLE_STRING(" \f\n\r\t\v\000", CSS_CHAR_WHITESPACE),
SCAN_TABLE_STRING("\f\n\r", CSS_CHAR_NEWLINE),
SCAN_TABLE_STRING("-", CSS_CHAR_IDENT),
SCAN_TABLE_STRING(".#@!\"'<-/", CSS_CHAR_TOKEN_START),
SCAN_TABLE_STRING(".#@!\"'<-/|^$*", CSS_CHAR_TOKEN_START),
/* Unicode escape (that we do not handle yet) + other special chars */
SCAN_TABLE_STRING("\\_*", CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),
SCAN_TABLE_STRING("\\_", CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),
/* This should contain mostly used char tokens like ':' and maybe a few
* garbage chars that people might put in their CSS code */
SCAN_TABLE_STRING("({});:,.>", CSS_CHAR_TOKEN),
SCAN_TABLE_STRING("[({})];:,.>+~", CSS_CHAR_TOKEN),
SCAN_TABLE_STRING("<![CDATA]->", CSS_CHAR_SGML_MARKUP),
SCAN_TABLE_END,
@ -259,6 +259,32 @@ scan_css_token(struct scanner *scanner, struct scanner_token *token)
CSS_TOKEN_AT_KEYWORD);
}
} else if (first_char == '*') {
if (*string == '=') {
type = CSS_TOKEN_SELECT_CONTAINS;
string++;
} else {
type = CSS_TOKEN_IDENT;
}
} else if (first_char == '^') {
if (*string == '=') {
type = CSS_TOKEN_SELECT_BEGIN;
string++;
}
} else if (first_char == '$') {
if (*string == '=') {
type = CSS_TOKEN_SELECT_END;
string++;
}
} else if (first_char == '|') {
if (*string == '=') {
type = CSS_TOKEN_SELECT_HYPHEN_LIST;
string++;
}
} else if (first_char == '!') {
scan_css(scanner, string, CSS_CHAR_WHITESPACE);
if (!strncasecmp(string, "important", 9)) {

View File

@ -69,7 +69,12 @@ enum css_token_type {
CSS_TOKEN_IMPORTANT, /* !<whitespace>important */
/* TODO: Selector stuff like "|=" and "~=" */
/* TODO: Selector stuff: */
CSS_TOKEN_SELECT_SPACE_LIST, /* ~= */
CSS_TOKEN_SELECT_HYPHEN_LIST, /* |= */
CSS_TOKEN_SELECT_BEGIN, /* ^= */
CSS_TOKEN_SELECT_END, /* $= */
CSS_TOKEN_SELECT_CONTAINS, /* *= */
/* Special tokens: */

View File

@ -1,6 +1,6 @@
top_builddir=../../..
include $(top_builddir)/Makefile.config
OBJS = node.o renderer.o stack.o
OBJS = renderer.o
include $(top_srcdir)/Makefile.lib

View File

@ -4,6 +4,10 @@
#include "config.h"
#endif
#include <sys/types.h> /* FreeBSD needs this before regex.h */
#ifdef HAVE_REGEX_H
#include <regex.h>
#endif
#include <string.h>
#include "elinks.h"
@ -16,11 +20,12 @@
#include "document/css/stylesheet.h"
#include "document/docdata.h"
#include "document/document.h"
#include "document/dom/node.h"
#include "document/dom/renderer.h"
#include "document/dom/stack.h"
#include "document/renderer.h"
#include "document/sgml/parser.h"
#include "dom/scanner.h"
#include "dom/sgml/parser.h"
#include "dom/node.h"
#include "dom/stack.h"
#include "intl/charsets.h"
#include "globhist/globhist.h" /* get_global_history_item() */
#include "protocol/uri.h"
@ -28,7 +33,6 @@
#include "util/box.h"
#include "util/error.h"
#include "util/memory.h"
#include "util/scanner.h"
#include "util/snprintf.h"
#include "util/string.h"
@ -45,9 +49,15 @@ struct dom_renderer {
unsigned char *position;
int canvas_x, canvas_y;
#ifdef HAVE_REGEX_H
regex_t url_regex;
unsigned int find_url:1;
#endif
struct screen_char styles[DOM_NODES];
};
#define URL_REGEX "(file://|((f|ht|nt)tp(s)?|smb)://[[:alnum:]]+([-@:.]?[[:alnum:]])*\\.[[:alpha:]]{2,4}(:[[:digit:]]+)?)(/(%[[:xdigit:]]{2}|[-_~&=;?.a-z0-9])*)*"
#define URL_REGFLAGS (REG_ICASE | REG_EXTENDED)
static void
init_template(struct screen_char *template, struct document_options *options,
@ -91,14 +101,23 @@ init_dom_renderer(struct dom_renderer *renderer, struct document *document,
renderer->end = buffer->source + buffer->length;
renderer->position = renderer->source;
#ifdef HAVE_REGEX_H
if (renderer->document->options.plain_display_links) {
if (regcomp(&renderer->url_regex, URL_REGEX, URL_REGFLAGS)) {
regfree(&renderer->url_regex);
} else {
renderer->find_url = 1;
}
}
#endif
for (type = 0; type < DOM_NODES; type++) {
struct screen_char *template = &renderer->styles[type];
color_T background = document->options.default_bg;
color_T foreground = document->options.default_fg;
static int i_want_struct_module_for_dom;
unsigned char *name = get_dom_node_type_name(type);
int namelen = name ? strlen(name) : 0;
struct dom_string *name = get_dom_node_type_name(type);
struct css_selector *selector = NULL;
if (!i_want_struct_module_for_dom) {
@ -108,7 +127,8 @@ init_dom_renderer(struct dom_renderer *renderer, struct document *document,
"entity-reference { color: red } "
"proc-instruction { color: red } "
"attribute { color: magenta } "
"comment { color: aqua } ";
"comment { color: aqua } "
"cdata-section { color: orange2 } ";
unsigned char *styles = (unsigned char *) default_colors;
i_want_struct_module_for_dom = 1;
@ -119,9 +139,10 @@ init_dom_renderer(struct dom_renderer *renderer, struct document *document,
}
if (name)
if (is_dom_string_set(name))
selector = find_css_selector(&css->selectors,
CST_ELEMENT, CSR_ROOT,
name, namelen);
name->string, name->length);
if (selector) {
struct list_head *properties = &selector->properties;
@ -286,33 +307,6 @@ render_dom_text(struct dom_renderer *renderer, struct screen_char *template,
}
}
#ifdef DOM_TREE_RENDERER
static void
render_dom_printf(struct dom_renderer *renderer, struct screen_char *template,
unsigned char *format, ...)
{
unsigned char *text;
int textlen;
va_list ap, ap2;
va_start(ap, format);
VA_COPY(ap2, ap);
textlen = vsnprintf(NULL, 0, format, ap2);
text = mem_alloc(textlen + 1);
if (!text) goto free_va_args;
if (vsnprintf((char *) text, textlen + 1, format, ap) == textlen)
render_dom_text(renderer, template, text, textlen);
mem_free(text);
free_va_args:
va_end(ap);
}
#endif /* DOM_TREE_RENDERER */
#define realloc_document_links(doc, size) \
ALIGN_LINK(&(doc)->links, (doc)->nlinks, size)
@ -382,112 +376,6 @@ add_dom_link(struct dom_renderer *renderer, unsigned char *string, int length)
}
/* DOM Tree Renderer */
#ifdef DOM_TREE_RENDERER
static struct dom_node *
render_dom_tree(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->renderer;
struct screen_char *template = &renderer->styles[node->type];
unsigned char *name, *value;
assert(node && renderer);
name = get_dom_node_name(node);
value = memacpy(node->string, node->length);
render_dom_printf(renderer, template, "%-16s: %s\n", name, value);
mem_free_if(name);
mem_free_if(value);
return node;
}
static struct dom_node *
render_dom_tree_id_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->renderer;
struct document *document = renderer->document;
struct screen_char *template = &renderer->styles[node->type];
unsigned char *name, *value, *id;
assert(node && document);
name = get_dom_node_name(node);
value = get_dom_node_value(node, document->options.cp);
id = get_dom_node_type_name(node->type);
renderer->canvas_x += stack->depth;
render_dom_printf(renderer, template, "%-16s: %s -> %s\n", id, name, value);
mem_free_if(name);
mem_free_if(value);
return node;
}
static struct dom_node *
render_dom_tree_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->renderer;
struct document *document = renderer->document;
struct screen_char *template = &renderer->styles[node->type];
unsigned char *name, *value;
assert(node && document);
name = get_dom_node_name(node);
value = get_dom_node_value(node, document->options.cp);
renderer->canvas_x += stack->depth;
render_dom_printf(renderer, template, "%-16s: %s\n", name, value);
mem_free_if(name);
mem_free_if(value);
return node;
}
static struct dom_node *
render_dom_tree_branch(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->renderer;
struct document *document = renderer->document;
struct screen_char *template = &renderer->styles[node->type];
unsigned char *name, *id;
assert(node && document);
name = get_dom_node_name(node);
id = get_dom_node_type_name(node->type);
renderer->canvas_x += stack->depth;
render_dom_printf(renderer, template, "%-16s: %s\n", id, name);
mem_free_if(name);
return node;
}
static dom_stack_callback_T dom_tree_renderer_callbacks[DOM_NODES] = {
/* */ NULL,
/* DOM_NODE_ELEMENT */ render_dom_tree_branch,
/* DOM_NODE_ATTRIBUTE */ render_dom_tree_id_leaf,
/* DOM_NODE_TEXT */ render_dom_tree_leaf,
/* DOM_NODE_CDATA_SECTION */ render_dom_tree_id_leaf,
/* DOM_NODE_ENTITY_REFERENCE */ render_dom_tree_id_leaf,
/* DOM_NODE_ENTITY */ render_dom_tree_id_leaf,
/* DOM_NODE_PROC_INSTRUCTION */ render_dom_tree_id_leaf,
/* DOM_NODE_COMMENT */ render_dom_tree_leaf,
/* DOM_NODE_DOCUMENT */ render_dom_tree,
/* DOM_NODE_DOCUMENT_TYPE */ render_dom_tree_id_leaf,
/* DOM_NODE_DOCUMENT_FRAGMENT */ render_dom_tree_id_leaf,
/* DOM_NODE_NOTATION */ render_dom_tree_id_leaf,
};
#endif /* DOM_TREE_RENDERER */
/* DOM Source Renderer */
#define check_dom_node_source(renderer, str, len) \
@ -517,8 +405,8 @@ static inline void
render_dom_node_text(struct dom_renderer *renderer, struct screen_char *template,
struct dom_node *node)
{
unsigned char *string = node->string;
int length = node->length;
unsigned char *string = node->string.string;
int length = node->string.length;
if (node->type == DOM_NODE_ENTITY_REFERENCE) {
string -= 1;
@ -534,72 +422,98 @@ render_dom_node_text(struct dom_renderer *renderer, struct screen_char *template
render_dom_text(renderer, template, string, length);
}
static struct dom_node *
render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *data)
#ifdef HAVE_REGEX_H
static inline void
render_dom_node_enhanced_text(struct dom_renderer *renderer, struct dom_node *node)
{
struct dom_renderer *renderer = stack->renderer;
regex_t *regex = &renderer->url_regex;
regmatch_t regmatch;
unsigned char *string = node->string.string;
int length = node->string.length;
struct screen_char *template = &renderer->styles[node->type];
unsigned char *alloc_string;
assert(node && renderer && renderer->document);
/* TODO: For (atleast) text, CDATA section and comment nodes check
* for URIs ala document->options.plain_display_links */
render_dom_node_text(renderer, &renderer->styles[node->type], node);
return node;
}
static struct dom_node *
render_dom_proc_instr_source(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->renderer;
unsigned char *value;
int valuelen;
assert(node && renderer && renderer->document);
render_dom_node_text(renderer, &renderer->styles[node->type], node);
value = node->data.proc_instruction.instruction;
valuelen = node->data.proc_instruction.instructionlen;
if (!value || node->data.proc_instruction.map)
return node;
if (check_dom_node_source(renderer, node->string, node->length)) {
render_dom_flush(renderer, value);
renderer->position = value + valuelen;
if (check_dom_node_source(renderer, string, length)) {
render_dom_flush(renderer, string);
renderer->position = string + length;
assert_source(renderer, renderer->position, 0);
}
render_dom_text(renderer, &renderer->styles[DOM_NODE_ATTRIBUTE], value, valuelen);
alloc_string = memacpy(string, length);
if (alloc_string)
string = alloc_string;
return node;
while (length > 0 && !regexec(regex, string, 1, &regmatch, 0)) {
int matchlen = regmatch.rm_eo - regmatch.rm_so;
int offset = regmatch.rm_so;
if (!matchlen || offset < 0 || regmatch.rm_eo > length)
break;
if (offset > 0)
render_dom_text(renderer, template, string, offset);
string += offset;
length -= offset;
add_dom_link(renderer, string, matchlen);
length -= matchlen;
string += matchlen;
}
if (length > 0)
render_dom_text(renderer, template, string, length);
mem_free_if(alloc_string);
}
#endif
static void
render_dom_node_source(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->current->data;
assert(node && renderer && renderer->document);
#ifdef HAVE_REGEX_H
if (renderer->find_url
&& (node->type == DOM_NODE_TEXT
|| node->type == DOM_NODE_CDATA_SECTION
|| node->type == DOM_NODE_COMMENT)) {
render_dom_node_enhanced_text(renderer, node);
return;
}
#endif
render_dom_node_text(renderer, &renderer->styles[node->type], node);
}
static struct dom_node *
/* This callback is also used for rendering processing instruction nodes. */
static void
render_dom_element_source(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->renderer;
struct dom_renderer *renderer = stack->current->data;
assert(node && renderer && renderer->document);
render_dom_node_text(renderer, &renderer->styles[node->type], node);
return node;
}
static struct dom_node *
static void
render_dom_element_end_source(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->renderer;
struct sgml_parser_state *pstate = data;
struct scanner_token *token = &pstate->end_token;
unsigned char *string = token->string;
int length = token->length;
struct dom_renderer *renderer = stack->current->data;
struct dom_stack_state *state = get_dom_stack_top(stack);
struct sgml_parser_state *pstate = get_dom_stack_state_data(stack->contexts[0], state);
struct dom_scanner_token *token = &pstate->end_token;
unsigned char *string = token->string.string;
int length = token->string.length;
assert(node && renderer && renderer->document);
if (!string || !length)
return node;
return;
if (check_dom_node_source(renderer, string, length)) {
render_dom_flush(renderer, string);
@ -608,47 +522,22 @@ render_dom_element_end_source(struct dom_stack *stack, struct dom_node *node, vo
}
render_dom_text(renderer, &renderer->styles[node->type], string, length);
return node;
}
static struct dom_node *
static void
render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->renderer;
struct dom_renderer *renderer = stack->current->data;
struct screen_char *template = &renderer->styles[node->type];
assert(node && renderer->document);
#if 0
/* Disabled since the DOM source highlighter uses the stream parser and
* therefore the attributes is pushed to it in order. However, if/when
* we will support rendering (read saving) of loaded DOM trees this one
* small hack is needed to get the attributes in the original order. */
{
struct dom_stack_state *state = get_dom_stack_parent(stack);
struct dom_node *attribute = NULL;
int i;
assert(state && state->list);
/* The attributes are sorted but we want them in the original order */
foreach_dom_node(i, node, state->list) {
if (node->string >= renderer->position
&& (!attribute || node->string < attribute->string))
attribute = node;
}
assert(attribute);
node = attribute;
}
#endif
render_dom_node_text(renderer, template, node);
if (node->data.attribute.value) {
if (is_dom_string_set(&node->data.attribute.value)) {
int quoted = node->data.attribute.quoted == 1;
unsigned char *value = node->data.attribute.value - quoted;
int valuelen = node->data.attribute.valuelen + quoted * 2;
unsigned char *value = node->data.attribute.value.string - quoted;
int valuelen = node->data.attribute.value.length + quoted * 2;
if (check_dom_node_source(renderer, value, 0)) {
render_dom_flush(renderer, value);
@ -699,40 +588,74 @@ render_dom_attribute_source(struct dom_stack *stack, struct dom_node *node, void
render_dom_text(renderer, template, value, valuelen);
}
}
return node;
}
static dom_stack_callback_T dom_source_renderer_push_callbacks[DOM_NODES] = {
/* */ NULL,
/* DOM_NODE_ELEMENT */ render_dom_element_source,
/* DOM_NODE_ATTRIBUTE */ render_dom_attribute_source,
/* DOM_NODE_TEXT */ render_dom_node_source,
/* DOM_NODE_CDATA_SECTION */ render_dom_node_source,
/* DOM_NODE_ENTITY_REFERENCE */ render_dom_node_source,
/* DOM_NODE_ENTITY */ render_dom_node_source,
/* DOM_NODE_PROC_INSTRUCTION */ render_dom_proc_instr_source,
/* DOM_NODE_COMMENT */ render_dom_node_source,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ render_dom_node_source,
/* DOM_NODE_DOCUMENT_FRAGMENT */ render_dom_node_source,
/* DOM_NODE_NOTATION */ render_dom_node_source,
};
static void
render_dom_cdata_source(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->current->data;
unsigned char *string = node->string.string;
static dom_stack_callback_T dom_source_renderer_pop_callbacks[DOM_NODES] = {
/* */ NULL,
/* DOM_NODE_ELEMENT */ render_dom_element_end_source,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ NULL,
/* DOM_NODE_CDATA_SECTION */ NULL,
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
assert(node && renderer && renderer->document);
/* Highlight the 'CDATA' part of <![CDATA[ if it is there. */
if (check_dom_node_source(renderer, string - 6, 6)) {
render_dom_flush(renderer, string - 6);
render_dom_text(renderer, &renderer->styles[DOM_NODE_ATTRIBUTE], string - 6, 5);
renderer->position = string - 1;
assert_source(renderer, renderer->position, 0);
}
render_dom_node_text(renderer, &renderer->styles[node->type], node);
}
static void
render_dom_document_end(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct dom_renderer *renderer = stack->current->data;
/* If there are no non-element nodes after the last element node make
* sure that we flush to the end of the cache entry source including
* the '>' of the last element tag if it has one. (bug 519) */
if (check_dom_node_source(renderer, renderer->position, 0)) {
render_dom_flush(renderer, renderer->end);
}
}
static struct dom_stack_context_info dom_source_renderer_context_info = {
/* Object size: */ 0,
/* Push: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ render_dom_element_source,
/* DOM_NODE_ATTRIBUTE */ render_dom_attribute_source,
/* DOM_NODE_TEXT */ render_dom_node_source,
/* DOM_NODE_CDATA_SECTION */ render_dom_cdata_source,
/* DOM_NODE_ENTITY_REFERENCE */ render_dom_node_source,
/* DOM_NODE_ENTITY */ render_dom_node_source,
/* DOM_NODE_PROC_INSTRUCTION */ render_dom_element_source,
/* DOM_NODE_COMMENT */ render_dom_node_source,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ render_dom_node_source,
/* DOM_NODE_DOCUMENT_FRAGMENT */ render_dom_node_source,
/* DOM_NODE_NOTATION */ render_dom_node_source,
},
/* Pop: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ render_dom_element_end_source,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ NULL,
/* DOM_NODE_CDATA_SECTION */ NULL,
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ render_dom_document_end,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
}
};
@ -746,6 +669,11 @@ render_dom_document(struct cache_entry *cached, struct document *document,
struct dom_renderer renderer;
struct conv_table *convert_table;
struct sgml_parser *parser;
enum sgml_document_type doctype;
unsigned char *string = struri(cached->uri);
size_t length = strlen(string);
struct dom_string uri = INIT_DOM_STRING(string, length);
struct dom_string source = INIT_DOM_STRING(buffer->source, buffer->length);
assert(document->options.plain);
@ -759,21 +687,31 @@ render_dom_document(struct cache_entry *cached, struct document *document,
document->bgcolor = document->options.default_bg;
parser = init_sgml_parser(SGML_PARSER_STREAM, &renderer, cached->uri,
dom_source_renderer_push_callbacks,
dom_source_renderer_pop_callbacks);
if (cached->content_type
&& !strlcasecmp("application/rss+xml", 19, cached->content_type, -1))
doctype = SGML_DOCTYPE_RSS;
else
doctype = SGML_DOCTYPE_HTML;
parser = init_sgml_parser(SGML_PARSER_STREAM, doctype, &uri);
if (!parser) return;
root = parse_sgml(parser, buffer);
done_sgml_parser(parser);
if (!root) return;
add_dom_stack_context(&parser->stack, &renderer,
&dom_source_renderer_context_info);
/* If there are no non-element nodes after the last element node make
* sure that we flush to the end of the cache entry source including
* the '>' of the last element tag if it has one. (bug 519) */
if (check_dom_node_source(&renderer, renderer.position, 0)) {
render_dom_flush(&renderer, renderer.end);
root = parse_sgml(parser, &source);
if (root) {
assert(parser->stack.depth == 1);
get_dom_stack_top(&parser->stack)->immutable = 0;
/* For SGML_PARSER_STREAM this will free the DOM
* root node. */
pop_dom_node(&parser->stack);
}
done_dom_node(root);
#ifdef HAVE_REGEX_H
if (renderer.find_url)
regfree(&renderer.url_regex);
#endif
done_sgml_parser(parser);
}

View File

@ -1,279 +0,0 @@
/* The DOM tree navigation interface */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdlib.h>
#include <string.h>
#include "elinks.h"
#include "document/dom/node.h"
#include "document/dom/stack.h"
#include "util/memory.h"
#include "util/string.h"
/* Navigator states */
#define DOM_STACK_STATE_GRANULARITY 0x7
#define DOM_STACK_CALLBACKS_SIZE (sizeof(dom_stack_callback_T) * DOM_NODES)
static inline struct dom_stack_state *
realloc_dom_stack_states(struct dom_stack_state **states, size_t size)
{
return mem_align_alloc(states, size, size + 1,
struct dom_stack_state,
DOM_STACK_STATE_GRANULARITY);
}
static inline unsigned char *
realloc_dom_stack_state_objects(struct dom_stack *stack)
{
#ifdef DEBUG_MEMLEAK
return mem_align_alloc__(__FILE__, __LINE__, (void **) &stack->state_objects,
stack->depth, stack->depth + 1,
stack->object_size,
DOM_STACK_STATE_GRANULARITY);
#else
return mem_align_alloc__((void **) &stack->state_objects,
stack->depth, stack->depth + 1,
stack->object_size,
DOM_STACK_STATE_GRANULARITY);
#endif
}
void
init_dom_stack(struct dom_stack *stack, void *parser, void *renderer,
dom_stack_callback_T push_callbacks[DOM_NODES],
dom_stack_callback_T pop_callbacks[DOM_NODES],
size_t object_size, int keep_nodes)
{
assert(stack);
memset(stack, 0, sizeof(*stack));
stack->parser = parser;
stack->renderer = renderer;
stack->object_size = object_size;
stack->keep_nodes = !!keep_nodes;
if (push_callbacks)
memcpy(stack->push_callbacks, push_callbacks, DOM_STACK_CALLBACKS_SIZE);
if (pop_callbacks)
memcpy(stack->pop_callbacks, pop_callbacks, DOM_STACK_CALLBACKS_SIZE);
}
void
done_dom_stack(struct dom_stack *stack)
{
assert(stack);
mem_free_if(stack->states);
mem_free_if(stack->state_objects);
memset(stack, 0, sizeof(*stack));
}
struct dom_node *
push_dom_node(struct dom_stack *stack, struct dom_node *node)
{
dom_stack_callback_T callback;
struct dom_stack_state *state;
assert(stack && node);
assert(0 < node->type && node->type < DOM_NODES);
if (stack->depth > DOM_STACK_MAX_DEPTH) {
return NULL;
}
state = realloc_dom_stack_states(&stack->states, stack->depth);
if (!state) {
done_dom_node(node);
return NULL;
}
state += stack->depth;
if (stack->object_size) {
unsigned char *state_objects;
state_objects = realloc_dom_stack_state_objects(stack);
if (!state_objects) {
done_dom_node(node);
return NULL;
}
state->depth = stack->depth;
}
state->node = node;
/* Grow the state array to the new depth so the state accessors work
* in the callbacks */
stack->depth++;
callback = stack->push_callbacks[node->type];
if (callback) {
void *state_data = get_dom_stack_state_data(stack, state);
node = callback(stack, node, state_data);
/* If the callback returned NULL pop the state immediately */
if (!node) {
memset(state, 0, sizeof(*state));
stack->depth--;
assert(stack->depth >= 0);
}
}
return node;
}
static int
do_pop_dom_node(struct dom_stack *stack, struct dom_stack_state *parent)
{
struct dom_stack_state *state;
dom_stack_callback_T callback;
assert(stack);
if (!dom_stack_has_parents(stack)) return 0;
state = get_dom_stack_top(stack);
callback = stack->pop_callbacks[state->node->type];
if (callback) {
void *state_data = get_dom_stack_state_data(stack, state);
callback(stack, state->node, state_data);
}
if (!stack->keep_nodes)
done_dom_node(state->node);
stack->depth--;
assert(stack->depth >= 0);
if (stack->object_size) {
void *state_data = get_dom_stack_state_data(stack, state);
memset(state_data, 0, stack->object_size);
}
memset(state, 0, sizeof(*state));
return state == parent;
}
void
pop_dom_node(struct dom_stack *stack)
{
assert(stack);
if (!dom_stack_has_parents(stack)) return;
do_pop_dom_node(stack, get_dom_stack_parent(stack));
}
void
pop_dom_nodes(struct dom_stack *stack, enum dom_node_type type,
unsigned char *string, uint16_t length)
{
struct dom_stack_state *state;
if (!dom_stack_has_parents(stack)) return;
state = search_dom_stack(stack, type, string, length);
if (state)
pop_dom_state(stack, type, state);
}
void
pop_dom_state(struct dom_stack *stack, enum dom_node_type type,
struct dom_stack_state *target)
{
struct dom_stack_state *state;
unsigned int pos;
if (!target) return;
if (!dom_stack_has_parents(stack)) return;
foreachback_dom_state (stack, state, pos) {
if (do_pop_dom_node(stack, target))
break;;
}
}
void
walk_dom_nodes(struct dom_stack *stack, struct dom_node *root)
{
assert(root && stack);
push_dom_node(stack, root);
while (dom_stack_has_parents(stack)) {
struct dom_stack_state *state = get_dom_stack_top(stack);
struct dom_node_list *list = state->list;
struct dom_node *node = state->node;
switch (node->type) {
case DOM_NODE_DOCUMENT:
if (!list) list = node->data.document.children;
break;
case DOM_NODE_ELEMENT:
if (!list) list = node->data.element.map;
if (list == node->data.element.children) break;
if (is_dom_node_list_member(list, state->index)
&& list == node->data.element.map)
break;
list = node->data.element.children;
break;
case DOM_NODE_PROCESSING_INSTRUCTION:
if (!list) list = node->data.proc_instruction.map;
break;
case DOM_NODE_DOCUMENT_TYPE:
if (!list) list = node->data.document_type.entities;
if (list == node->data.document_type.notations) break;
if (is_dom_node_list_member(list, state->index)
&& list == node->data.document_type.entities)
break;
list = node->data.document_type.notations;
break;
case DOM_NODE_ATTRIBUTE:
case DOM_NODE_TEXT:
case DOM_NODE_CDATA_SECTION:
case DOM_NODE_COMMENT:
case DOM_NODE_NOTATION:
case DOM_NODE_DOCUMENT_FRAGMENT:
case DOM_NODE_ENTITY_REFERENCE:
case DOM_NODE_ENTITY:
default:
break;
}
/* Reset list state if it is a new list */
if (list != state->list) {
state->list = list;
state->index = 0;
}
/* If we have next child node */
if (is_dom_node_list_member(list, state->index)) {
struct dom_node *child = list->entries[state->index++];
if (push_dom_node(stack, child))
continue;
}
pop_dom_node(stack);
}
}

View File

@ -1,138 +0,0 @@
#ifndef EL__DOCUMENT_DOM_STACK_H
#define EL__DOCUMENT_DOM_STACK_H
#include "document/document.h"
#include "document/dom/node.h"
#include "util/error.h"
#include "util/hash.h"
struct dom_stack;
typedef struct dom_node *
(*dom_stack_callback_T)(struct dom_stack *, struct dom_node *, void *);
#define DOM_STACK_MAX_DEPTH 4096
struct dom_stack_state {
struct dom_node *node;
/* Used for recording which node list are currently being 'decended'
* into. E.g. whether we are iterating all child elements or attributes
* of an element. */
struct dom_node_list *list;
/* The index (in the list above) which are currently being handled. */
size_t index;
/* The depth of the state in the stack. This is amongst other things
* used to get the state object data. */
unsigned int depth;
};
/* The DOM stack is a convenient way to traverse DOM trees. Also it
* maintains needed state info and is therefore also a holder of the current
* context since the stack is used to when the DOM tree is manipulated. */
struct dom_stack {
/* The stack of nodes */
struct dom_stack_state *states;
size_t depth;
/* Keep nodes when popping them or call done_dom_node() on them. */
unsigned int keep_nodes:1;
/* This is one big array of parser specific objects. */
/* The objects hold parser specific data. For the SGML parser this
* holds DTD-oriented info about the node (recorded in struct
* sgml_node_info). E.g. whether an element node is optional. */
unsigned char *state_objects;
size_t object_size;
/* Renderer specific callbacks for the streaming parser mode. */
dom_stack_callback_T push_callbacks[DOM_NODES];
dom_stack_callback_T pop_callbacks[DOM_NODES];
/* Data specific to the parser and renderer. */
void *renderer;
void *parser;
};
#define dom_stack_has_parents(nav) \
((nav)->states && (nav)->depth > 0)
static inline struct dom_stack_state *
get_dom_stack_state(struct dom_stack *stack, int top_offset)
{
assertm(stack->depth - 1 - top_offset >= 0,
"Attempting to access invalid state");
return &stack->states[stack->depth - 1 - top_offset];
}
#define get_dom_stack_parent(nav) get_dom_stack_state(nav, 1)
#define get_dom_stack_top(nav) get_dom_stack_state(nav, 0)
#define get_dom_stack_state_data(stack, state) \
((void *) &(stack)->state_objects[(state)->depth * (stack)->object_size])
/* The state iterators do not include the bottom state */
#define foreach_dom_state(nav, item, pos) \
for ((pos) = 1; (pos) < (nav)->depth; (pos)++) \
if (((item) = &(nav)->states[(pos)]))
#define foreachback_dom_state(nav, item, pos) \
for ((pos) = (nav)->depth - 1; (pos) > 0; (pos)--) \
if (((item) = &(nav)->states[(pos)]))
/* Dive through the stack states in search for the specified match. */
static inline struct dom_stack_state *
search_dom_stack(struct dom_stack *stack, enum dom_node_type type,
unsigned char *string, uint16_t length)
{
struct dom_stack_state *state;
int pos;
/* FIXME: Take node subtype and compare if non-zero or something. */
foreachback_dom_state (stack, state, pos) {
struct dom_node *parent = state->node;
if (parent->type == type
&& parent->length == length
&& !strncasecmp(parent->string, string, length))
return state;
}
return NULL;
}
/* Life cycle functions. */
/* The @object_size arg tells whether the stack should allocate objects for each
* state to be assigned to the state's @data member. Zero means no state data should
* be allocated. */
void init_dom_stack(struct dom_stack *stack, void *parser, void *renderer,
dom_stack_callback_T push_callbacks[DOM_NODES],
dom_stack_callback_T pop_callbacks[DOM_NODES],
size_t object_size, int keep_nodes);
void done_dom_stack(struct dom_stack *stack);
/* Decends down to the given node making it the current parent */
/* If an error occurs the node is free()d and NULL is returned */
struct dom_node *push_dom_node(struct dom_stack *stack, struct dom_node *node);
/* Ascends the stack to the current parent */
void pop_dom_node(struct dom_stack *stack);
/* Ascends the stack looking for specific parent */
void pop_dom_nodes(struct dom_stack *stack, enum dom_node_type type,
unsigned char *string, uint16_t length);
/* Pop all stack states until a specific state is reached. */
void
pop_dom_state(struct dom_stack *stack, enum dom_node_type type,
struct dom_stack_state *target);
/* Visit each node in the tree rooted at @root pre-order */
void walk_dom_nodes(struct dom_stack *stack, struct dom_node *root);
#endif

View File

@ -417,8 +417,6 @@ abort:
if (!closing_tag) {
unsigned char *value, *label;
add_select_item(&lnk_menu, &lbl, &orig_lbl, values, order, nnmi);
if (has_attr(t_attr, "disabled", html_context->options))
goto see;
if (preselect == -1

View File

@ -304,8 +304,6 @@ add_document_line(struct plain_renderer *renderer,
} else if (line_char == ASCII_BS) {
if (!(expanded + line_pos)) {
/* We've backspaced to the start of the line */
if (expanded > 0)
expanded--; /* Don't count it */
continue;
}
@ -327,7 +325,7 @@ add_document_line(struct plain_renderer *renderer,
continue;
}
if (expanded - 2 >= 0) {
if ((expanded + line_pos) - 2 >= 0) {
/* Don't count the backspace character or the
* deleted character when returning the line's
* width or when expanding tabs. */

View File

@ -205,7 +205,6 @@ render_encoded_document(struct cache_entry *cached, struct document *document)
{
struct uri *uri = cached->uri;
enum stream_encoding encoding = ENCODING_NONE;
unsigned char *extension;
struct fragment *fragment = get_cache_fragment(cached);
struct string buffer = INIT_STRING("", 0);
@ -216,30 +215,37 @@ render_encoded_document(struct cache_entry *cached, struct document *document)
buffer.length = fragment->length;
}
extension = get_extension_from_uri(uri);
if (extension) {
encoding = guess_encoding(extension);
mem_free(extension);
}
if (uri->protocol != PROTOCOL_FILE) {
unsigned char *extension = get_extension_from_uri(uri);
if (encoding != ENCODING_NONE) {
int length = 0;
unsigned char *source;
if (extension) {
encoding = guess_encoding(extension);
mem_free(extension);
}
source = decode_encoded_buffer(encoding, buffer.source,
if (encoding != ENCODING_NONE) {
int length = 0;
unsigned char *source;
source = decode_encoded_buffer(encoding, buffer.source,
buffer.length, &length);
if (source) {
buffer.source = source;
buffer.length = length;
} else {
encoding = ENCODING_NONE;
if (source) {
buffer.source = source;
buffer.length = length;
} else {
encoding = ENCODING_NONE;
}
}
}
if (document->options.plain) {
#ifdef CONFIG_DOM
if (cached->content_type
&& !strlcasecmp("text/html", 9, cached->content_type, -1))
&& (!strlcasecmp("text/html", 9, cached->content_type, -1)
|| !strlcasecmp("application/rss+xml", 19, cached->content_type, -1)
|| !strlcasecmp("application/xbel+xml", 20, cached->content_type, -1)
|| !strlcasecmp("application/x-xbel", 18, cached->content_type, -1)
|| !strlcasecmp("application/xbel", 16, cached->content_type, -1)))
render_dom_document(cached, document, &buffer);
else
#endif

View File

@ -1,356 +0,0 @@
/* SGML node handling */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdlib.h>
#include <string.h>
#include "elinks.h"
#include "document/dom/node.h"
#include "document/dom/stack.h"
#include "document/sgml/html/html.h"
#include "document/sgml/parser.h"
#include "document/sgml/scanner.h"
#include "document/sgml/sgml.h"
#include "protocol/uri.h"
#include "util/error.h"
#include "util/lists.h"
#include "util/memory.h"
#include "util/string.h"
/* Functions for adding new nodes to the DOM tree */
static inline struct dom_node *
add_sgml_document(struct dom_stack *stack, struct uri *uri)
{
unsigned char *string = struri(uri);
int length = strlen(string);
struct dom_node *node = init_dom_node(DOM_NODE_DOCUMENT, string, length);
return node ? push_dom_node(stack, node) : node;
}
static inline struct dom_node *
add_sgml_element(struct dom_stack *stack, struct scanner_token *token)
{
struct sgml_parser *parser = stack->parser;
struct dom_node *parent = get_dom_stack_top(stack)->node;
struct dom_stack_state *state;
struct sgml_parser_state *pstate;
struct dom_node *node;
struct sgml_node_info *node_info;
node = add_dom_element(parent, token->string, token->length);
if (!node) return NULL;
node_info = get_sgml_node_info(parser->info->elements, node);
node->data.element.type = node_info->type;
if (!push_dom_node(stack, node))
return NULL;
state = get_dom_stack_top(stack);
assert(node == state->node);
pstate = get_dom_stack_state_data(stack, state);
pstate->info = node_info;
return node;
}
static inline void
add_sgml_attribute(struct dom_stack *stack,
struct scanner_token *token, struct scanner_token *valtoken)
{
struct sgml_parser *parser = stack->parser;
struct dom_node *parent = get_dom_stack_top(stack)->node;
unsigned char *value = valtoken ? valtoken->string : NULL;
uint16_t valuelen = valtoken ? valtoken->length : 0;
struct sgml_node_info *info;
struct dom_node *node;
node = add_dom_attribute(parent, token->string, token->length,
value, valuelen);
info = get_sgml_node_info(parser->info->attributes, node);
node->data.attribute.type = info->type;
node->data.attribute.id = !!(info->flags & SGML_ATTRIBUTE_IDENTIFIER);
node->data.attribute.reference = !!(info->flags & SGML_ATTRIBUTE_REFERENCE);
if (valtoken && valtoken->type == SGML_TOKEN_STRING)
node->data.attribute.quoted = 1;
if (!node || !push_dom_node(stack, node))
return;
pop_dom_node(stack);
}
static inline struct dom_node *
add_sgml_proc_instruction(struct dom_stack *stack, struct scanner_token *token)
{
struct dom_node *parent = get_dom_stack_top(stack)->node;
struct dom_node *node;
/* Split the token in two if we can find a first space separator. */
unsigned char *separator = memchr(token->string, ' ', token->length);
/* Anything before the separator becomes the target name ... */
unsigned char *name = token->string;
int namelen = separator ? separator - token->string : token->length;
/* ... and everything after the instruction value. */
unsigned char *value = separator ? separator + 1 : NULL;
int valuelen = value ? token->length - namelen - 1 : 0;
node = add_dom_proc_instruction(parent, name, namelen, value, valuelen);
if (!node) return NULL;
switch (token->type) {
case SGML_TOKEN_PROCESS_XML:
node->data.proc_instruction.type = DOM_PROC_INSTRUCTION_XML;
break;
case SGML_TOKEN_PROCESS:
default:
node->data.proc_instruction.type = DOM_PROC_INSTRUCTION;
}
if (!push_dom_node(stack, node))
return NULL;
if (token->type != SGML_TOKEN_PROCESS_XML)
pop_dom_node(stack);
return node;
}
static inline void
add_sgml_node(struct dom_stack *stack, enum dom_node_type type, struct scanner_token *token)
{
struct dom_node *parent = get_dom_stack_top(stack)->node;
struct dom_node *node = add_dom_node(parent, type, token->string, token->length);
if (!node) return;
if (token->type == SGML_TOKEN_SPACE)
node->data.text.only_space = 1;
if (push_dom_node(stack, node))
pop_dom_node(stack);
}
#define add_sgml_entityref(stack, t) add_sgml_node(stack, DOM_NODE_ENTITY_REFERENCE, t)
#define add_sgml_text(stack, t) add_sgml_node(stack, DOM_NODE_TEXT, t)
#define add_sgml_comment(stack, t) add_sgml_node(stack, DOM_NODE_COMMENT, t)
static inline void
parse_sgml_attributes(struct dom_stack *stack, struct scanner *scanner)
{
struct scanner_token name;
assert(scanner_has_tokens(scanner)
&& (get_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN
|| get_scanner_token(scanner)->type == SGML_TOKEN_PROCESS_XML));
skip_scanner_token(scanner);
while (scanner_has_tokens(scanner)) {
struct scanner_token *token = get_scanner_token(scanner);
assert(token);
switch (token->type) {
case SGML_TOKEN_TAG_END:
skip_scanner_token(scanner);
/* and return */
case SGML_TOKEN_ELEMENT:
case SGML_TOKEN_ELEMENT_BEGIN:
case SGML_TOKEN_ELEMENT_END:
case SGML_TOKEN_ELEMENT_EMPTY_END:
return;
case SGML_TOKEN_IDENT:
copy_struct(&name, token);
/* Skip the attribute name token */
token = get_next_scanner_token(scanner);
if (token && token->type == '=') {
/* If the token is not a valid value token
* ignore it. */
token = get_next_scanner_token(scanner);
if (token
&& token->type != SGML_TOKEN_IDENT
&& token->type != SGML_TOKEN_ATTRIBUTE
&& token->type != SGML_TOKEN_STRING)
token = NULL;
} else {
token = NULL;
}
add_sgml_attribute(stack, &name, token);
/* Skip the value token */
if (token)
skip_scanner_token(scanner);
break;
default:
skip_scanner_token(scanner);
}
}
}
void
parse_sgml_document(struct dom_stack *stack, struct scanner *scanner)
{
while (scanner_has_tokens(scanner)) {
struct scanner_token *token = get_scanner_token(scanner);
switch (token->type) {
case SGML_TOKEN_ELEMENT:
case SGML_TOKEN_ELEMENT_BEGIN:
if (!add_sgml_element(stack, token)) {
if (token->type == SGML_TOKEN_ELEMENT) {
skip_scanner_token(scanner);
break;
}
skip_sgml_tokens(scanner, SGML_TOKEN_TAG_END);
break;
}
if (token->type == SGML_TOKEN_ELEMENT_BEGIN) {
parse_sgml_attributes(stack, scanner);
} else {
skip_scanner_token(scanner);
}
break;
case SGML_TOKEN_ELEMENT_EMPTY_END:
pop_dom_node(stack);
skip_scanner_token(scanner);
break;
case SGML_TOKEN_ELEMENT_END:
if (!token->length) {
pop_dom_node(stack);
} else {
struct dom_stack_state *state;
state = search_dom_stack(stack, DOM_NODE_ELEMENT,
token->string, token->length);
if (state) {
struct sgml_parser_state *pstate;
pstate = get_dom_stack_state_data(stack, state);
copy_struct(&pstate->end_token, token);
pop_dom_state(stack, DOM_NODE_ELEMENT, state);
}
}
skip_scanner_token(scanner);
break;
case SGML_TOKEN_NOTATION_COMMENT:
add_sgml_comment(stack, token);
skip_scanner_token(scanner);
break;
case SGML_TOKEN_NOTATION_ATTLIST:
case SGML_TOKEN_NOTATION_DOCTYPE:
case SGML_TOKEN_NOTATION_ELEMENT:
case SGML_TOKEN_NOTATION_ENTITY:
case SGML_TOKEN_NOTATION:
skip_scanner_token(scanner);
break;
case SGML_TOKEN_PROCESS_XML:
if (!add_sgml_proc_instruction(stack, token)) {
skip_sgml_tokens(scanner, SGML_TOKEN_TAG_END);
break;
}
parse_sgml_attributes(stack, scanner);
pop_dom_node(stack);
break;
case SGML_TOKEN_PROCESS:
add_sgml_proc_instruction(stack, token);
skip_scanner_token(scanner);
break;
case SGML_TOKEN_ENTITY:
add_sgml_entityref(stack, token);
skip_scanner_token(scanner);
break;
case SGML_TOKEN_SPACE:
case SGML_TOKEN_TEXT:
default:
add_sgml_text(stack, token);
skip_scanner_token(scanner);
}
}
}
struct sgml_parser *
init_sgml_parser(enum sgml_parser_type type, void *renderer, struct uri *uri,
dom_stack_callback_T push_callbacks[DOM_NODES],
dom_stack_callback_T pop_callbacks[DOM_NODES])
{
size_t obj_size = sizeof(struct sgml_parser_state);
struct sgml_parser *parser;
parser = mem_calloc(1, sizeof(*parser));
if (!parser) return NULL;
parser->type = type;
parser->uri = get_uri_reference(uri);
parser->info = &sgml_html_info;
init_dom_stack(&parser->stack, parser, renderer,
push_callbacks, pop_callbacks, obj_size,
type != SGML_PARSER_STREAM);
parser->root = add_sgml_document(&parser->stack, parser->uri);
if (!parser->root) {
mem_free(parser);
return NULL;
}
return parser;
}
void
done_sgml_parser(struct sgml_parser *parser)
{
done_dom_stack(&parser->stack);
done_uri(parser->uri);
mem_free(parser);
}
/* FIXME: Make it possible to push variable number of strings (even nested
* while parsing another string) so that we can feed back output of stuff
* like ECMAScripts document.write(). */
struct dom_node *
parse_sgml(struct sgml_parser *parser, struct string *buffer)
{
unsigned char *source = buffer->source;
unsigned char *end = source + buffer->length;
init_scanner(&parser->scanner, &sgml_scanner_info, source, end);
/* FIXME: Make parse_sgml_document() return an error code. */
parse_sgml_document(&parser->stack, &parser->scanner);
return parser->root;
}

View File

@ -1,53 +0,0 @@
#ifndef EL__DOCUMENT_SGML_PARSER_H
#define EL__DOCUMENT_SGML_PARSER_H
#include "document/dom/node.h"
#include "document/dom/stack.h"
#include "document/sgml/sgml.h"
#include "util/scanner.h"
struct string;
struct uri;
enum sgml_parser_type {
/* The first one is a DOM tree builder. */
SGML_PARSER_TREE,
/* The second one will simply push nodes on the stack, not building a
* DOM tree. This interface is similar to that of SAX (Simple API for
* XML) where events are fired when nodes are entered and exited. It is
* useful when you are not actually interested in the DOM tree, but can
* do all processing in a stream-like manner, such as when highlighting
* HTML code. */
SGML_PARSER_STREAM,
};
struct sgml_parser {
enum sgml_parser_type type;
struct sgml_info *info;
struct uri *uri;
struct dom_node *root;
struct scanner scanner;
struct dom_stack stack;
};
struct sgml_parser_state {
struct sgml_node_info *info;
/* This is used by the DOM source renderer for highlighting the
* end-tag of an element. */
struct scanner_token end_token;
};
struct sgml_parser *
init_sgml_parser(enum sgml_parser_type type, void *renderer, struct uri *uri,
dom_stack_callback_T push_callbacks[DOM_NODES],
dom_stack_callback_T pop_callbacks[DOM_NODES]);
void done_sgml_parser(struct sgml_parser *parser);
struct dom_node *parse_sgml(struct sgml_parser *parser, struct string *buffer);
#endif

View File

@ -1,408 +0,0 @@
/* SGML token scanner utilities */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <string.h>
#include "elinks.h"
#include "document/sgml/scanner.h"
#include "util/error.h"
#include "util/scanner.h"
#include "util/string.h"
/* Bitmap entries for the SGML character groups used in the scanner table */
/* The SGML tokenizer maintains a state that can be either text or element
* state. The state has only meaning while doing the actual scanning and is not
* accessible at the parsing time. */
enum sgml_scanner_state {
SGML_STATE_TEXT,
SGML_STATE_ELEMENT,
};
enum sgml_char_group {
SGML_CHAR_ENTITY = (1 << 1),
SGML_CHAR_IDENT = (1 << 2),
SGML_CHAR_NEWLINE = (1 << 3),
SGML_CHAR_WHITESPACE = (1 << 4),
SGML_CHAR_NOT_TEXT = (1 << 5),
SGML_CHAR_NOT_ATTRIBUTE = (1 << 6),
};
static struct scan_table_info sgml_scan_table_info[] = {
SCAN_TABLE_RANGE("0", '9', SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
SCAN_TABLE_RANGE("A", 'Z', SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
SCAN_TABLE_RANGE("a", 'z', SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
/* For the octal number impared (me including) \241 is 161 --jonas */
SCAN_TABLE_RANGE("\241", 255, SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
SCAN_TABLE_STRING("-_:.", SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
SCAN_TABLE_STRING("#", SGML_CHAR_ENTITY),
SCAN_TABLE_STRING(" \f\n\r\t\v", SGML_CHAR_WHITESPACE),
SCAN_TABLE_STRING("\f\n\r", SGML_CHAR_NEWLINE),
SCAN_TABLE_STRING("<&", SGML_CHAR_NOT_TEXT),
SCAN_TABLE_STRING("<=>", SGML_CHAR_NOT_ATTRIBUTE),
SCAN_TABLE_END,
};
static struct scanner_string_mapping sgml_string_mappings[] = {
{ "--", SGML_TOKEN_NOTATION_COMMENT, SGML_TOKEN_NOTATION },
{ "ATTLIST", SGML_TOKEN_NOTATION_ATTLIST, SGML_TOKEN_NOTATION },
{ "DOCTYPE", SGML_TOKEN_NOTATION_DOCTYPE, SGML_TOKEN_NOTATION },
{ "ELEMENT", SGML_TOKEN_NOTATION_ELEMENT, SGML_TOKEN_NOTATION },
{ "ENTITY", SGML_TOKEN_NOTATION_ENTITY, SGML_TOKEN_NOTATION },
{ "xml", SGML_TOKEN_PROCESS_XML, SGML_TOKEN_PROCESS },
{ NULL, SGML_TOKEN_NONE, SGML_TOKEN_NONE },
};
static struct scanner_token *scan_sgml_tokens(struct scanner *scanner);
struct scanner_info sgml_scanner_info = {
sgml_string_mappings,
sgml_scan_table_info,
scan_sgml_tokens,
};
#define check_sgml_table(c, bit) (sgml_scanner_info.scan_table[(c)] & (bit))
#define scan_sgml(scanner, s, bit) \
while ((s) < (scanner)->end && check_sgml_table(*(s), bit)) (s)++;
#define is_sgml_ident(c) check_sgml_table(c, SGML_CHAR_IDENT)
#define is_sgml_entity(c) check_sgml_table(c, SGML_CHAR_ENTITY)
#define is_sgml_space(c) check_sgml_table(c, SGML_CHAR_WHITESPACE)
#define is_sgml_text(c) !check_sgml_table(c, SGML_CHAR_NOT_TEXT)
#define is_sgml_token_start(c) check_sgml_table(c, SGML_CHAR_TOKEN_START)
#define is_sgml_attribute(c) !check_sgml_table(c, SGML_CHAR_NOT_ATTRIBUTE | SGML_CHAR_WHITESPACE)
/* Text token scanning */
/* I think it is faster to not check the table here --jonas */
#define foreach_sgml_cdata(scanner, str) \
for (; ((str) < (scanner)->end && *(str) != '<' && *(str) != '&'); (str)++)
static inline void
scan_sgml_text_token(struct scanner *scanner, struct scanner_token *token)
{
unsigned char *string = scanner->position;
unsigned char first_char = *string;
enum sgml_token_type type = SGML_TOKEN_GARBAGE;
int real_length = -1;
/* In scan_sgml_tokens() we check that first_char != '<' */
assert(first_char != '<' && scanner->state == SGML_STATE_TEXT);
token->string = string++;
if (first_char == '&') {
if (is_sgml_entity(*string)) {
scan_sgml(scanner, string, SGML_CHAR_ENTITY);
type = SGML_TOKEN_ENTITY;
token->string++;
real_length = string - token->string;
}
foreach_sgml_cdata (scanner, string) {
if (*string == ';') {
string++;
break;
}
}
} else {
if (is_sgml_space(first_char)) {
scan_sgml(scanner, string, SGML_CHAR_WHITESPACE);
type = string < scanner->end && is_sgml_text(*string)
? SGML_TOKEN_TEXT : SGML_TOKEN_SPACE;
} else {
type = SGML_TOKEN_TEXT;
}
foreach_sgml_cdata (scanner, string) {
/* m33p */;
}
}
token->type = type;
token->length = real_length >= 0 ? real_length : string - token->string;
token->precedence = get_sgml_precedence(type);
scanner->position = string;
}
/* Element scanning */
/* Check whether it is safe to skip the @token when looking for @skipto. */
static inline int
check_sgml_precedence(int type, int skipto)
{
return get_sgml_precedence(type) <= get_sgml_precedence(skipto);
}
/* XXX: Only element or ``in tag'' precedence is handled correctly however
* using this function for CDATA or text would be overkill. */
static inline unsigned char *
skip_sgml(struct scanner *scanner, unsigned char **string, unsigned char skipto,
int check_quoting)
{
unsigned char *pos = *string;
for (; pos < scanner->end; pos++) {
if (*pos == skipto) {
*string = pos + 1;
return pos;
}
if (!check_sgml_precedence(*pos, skipto))
break;
if (check_quoting && isquote(*pos)) {
int length = scanner->end - pos;
unsigned char *end = memchr(pos + 1, *pos, length);
if (end) pos = end;
}
}
*string = pos;
return NULL;
}
static inline int
skip_comment(struct scanner *scanner, unsigned char **string)
{
unsigned char *pos = *string;
int length = 0;
for (; pos < scanner->end - 3; pos++)
if (pos[0] == '-' && pos[1] == '-' && pos[2] == '>') {
length = pos - *string;
pos += 3;
break;
}
*string = pos;
return length;
}
#define scan_sgml_attribute(scanner, str) \
while ((str) < (scanner)->end && is_sgml_attribute(*(str))) \
(str)++;
static inline void
scan_sgml_element_token(struct scanner *scanner, struct scanner_token *token)
{
unsigned char *string = scanner->position;
unsigned char first_char = *string;
enum sgml_token_type type = SGML_TOKEN_GARBAGE;
int real_length = -1;
token->string = string++;
if (first_char == '<') {
scan_sgml(scanner, string, SGML_CHAR_WHITESPACE);
if (scanner->state == SGML_STATE_ELEMENT) {
/* Already inside an element so insert a tag end token
* and continue scanning in next iteration. */
string--;
real_length = 0;
type = SGML_TOKEN_TAG_END;
scanner->state = SGML_STATE_TEXT;
} else if (is_sgml_ident(*string)) {
token->string = string;
scan_sgml(scanner, string, SGML_CHAR_IDENT);
real_length = string - token->string;
scan_sgml(scanner, string, SGML_CHAR_WHITESPACE);
if (*string == '>') {
type = SGML_TOKEN_ELEMENT;
string++;
} else {
scanner->state = SGML_STATE_ELEMENT;
type = SGML_TOKEN_ELEMENT_BEGIN;
}
} else if (*string == '!') {
unsigned char *ident;
enum sgml_token_type base = SGML_TOKEN_NOTATION;
string++;
scan_sgml(scanner, string, SGML_CHAR_WHITESPACE);
token->string = ident = string;
if (string + 1 < scanner->end
&& string[0] == '-' && string[1] == '-') {
string += 2;
type = SGML_TOKEN_NOTATION_COMMENT;
token->string = string;
real_length = skip_comment(scanner, &string);
assert(real_length >= 0);
} else {
scan_sgml(scanner, string, SGML_CHAR_IDENT);
type = map_scanner_string(scanner, ident, string, base);
skip_sgml(scanner, &string, '>', 0);
}
} else if (*string == '?') {
unsigned char *pos;
enum sgml_token_type base = SGML_TOKEN_PROCESS;
string++;
scan_sgml(scanner, string, SGML_CHAR_WHITESPACE);
token->string = pos = string;
scan_sgml(scanner, string, SGML_CHAR_IDENT);
type = map_scanner_string(scanner, pos, string, base);
/* Figure out where the processing instruction ends */
for (pos = string; skip_sgml(scanner, &pos, '>', 0); ) {
if (pos[-2] != '?') continue;
/* Set length until '?' char and move position
* beyond '>'. */
real_length = pos - token->string - 2;
break;
}
switch (type) {
case SGML_TOKEN_PROCESS_XML:
/* We want to parse the attributes */
assert(scanner->state != SGML_STATE_ELEMENT);
scanner->state = SGML_STATE_ELEMENT;
break;
default:
/* Just skip the whole thing */
string = pos;
}
} else if (*string == '/') {
string++;
scan_sgml(scanner, string, SGML_CHAR_WHITESPACE);
if (is_sgml_ident(*string)) {
token->string = string;
scan_sgml(scanner, string, SGML_CHAR_IDENT);
real_length = string - token->string;
type = SGML_TOKEN_ELEMENT_END;
skip_sgml(scanner, &string, '>', 1);
} else if (*string == '>') {
string++;
real_length = 0;
type = SGML_TOKEN_ELEMENT_END;
}
if (type != SGML_TOKEN_GARBAGE)
scanner->state = SGML_STATE_TEXT;
} else {
/* Alien < > stuff so ignore it */
skip_sgml(scanner, &string, '>', 0);
}
} else if (first_char == '=') {
type = '=';
} else if (first_char == '?' || first_char == '>') {
if (first_char == '?') {
skip_sgml(scanner, &string, '>', 0);
}
type = SGML_TOKEN_TAG_END;
assert(scanner->state == SGML_STATE_ELEMENT);
scanner->state = SGML_STATE_TEXT;
} else if (first_char == '/') {
if (*string == '>') {
string++;
real_length = 0;
type = SGML_TOKEN_ELEMENT_EMPTY_END;
assert(scanner->state == SGML_STATE_ELEMENT);
scanner->state = SGML_STATE_TEXT;
} else if (is_sgml_attribute(*string)) {
scan_sgml_attribute(scanner, string);
type = SGML_TOKEN_ATTRIBUTE;
}
} else if (isquote(first_char)) {
int size = scanner->end - string;
unsigned char *string_end = memchr(string, first_char, size);
if (string_end) {
/* We don't want the delimiters in the token */
token->string++;
real_length = string_end - token->string;
string = string_end + 1;
type = SGML_TOKEN_STRING;
} else if (is_sgml_attribute(*string)) {
token->string++;
scan_sgml_attribute(scanner, string);
type = SGML_TOKEN_ATTRIBUTE;
}
} else if (is_sgml_attribute(first_char)) {
if (is_sgml_ident(first_char)) {
scan_sgml(scanner, string, SGML_CHAR_IDENT);
type = SGML_TOKEN_IDENT;
}
if (is_sgml_attribute(*string)) {
scan_sgml_attribute(scanner, string);
type = SGML_TOKEN_ATTRIBUTE;
}
}
token->type = type;
token->length = real_length >= 0 ? real_length : string - token->string;
token->precedence = get_sgml_precedence(type);
scanner->position = string;
}
/* Scanner multiplexor */
static struct scanner_token *
scan_sgml_tokens(struct scanner *scanner)
{
struct scanner_token *table_end = scanner->table + SCANNER_TOKENS;
struct scanner_token *current;
if (!begin_token_scanning(scanner))
return get_scanner_token(scanner);
/* Scan tokens until we fill the table */
for (current = scanner->table + scanner->tokens;
current < table_end && scanner->position < scanner->end;
current++) {
if (scanner->state == SGML_STATE_ELEMENT
|| *scanner->position == '<') {
scan_sgml(scanner, scanner->position, SGML_CHAR_WHITESPACE);
if (scanner->position >= scanner->end) break;
scan_sgml_element_token(scanner, current);
/* Shall we scratch this token? */
if (current->type == SGML_TOKEN_SKIP) {
current--;
}
} else {
scan_sgml_text_token(scanner, current);
}
}
return end_token_scanning(scanner, current);
}

View File

@ -1,31 +0,0 @@
/* SGML generics */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdlib.h>
#include <string.h>
#include "elinks.h"
#include "document/dom/node.h"
#include "document/sgml/sgml.h"
#include "util/error.h"
#include "util/string.h"
int
sgml_info_strcmp(const void *key_, const void *node_)
{
struct dom_node *key = (struct dom_node *) key_;
struct sgml_node_info *node = (struct sgml_node_info *) node_;
int length = int_min(key->length, node->length);
int string_diff = strncasecmp(key->string, node->string, length);
int length_diff = key->length - node->length;
/* If the lengths or strings don't match strncasecmp() does the job
* else return which ever is bigger. */
return (!length_diff || string_diff) ? string_diff : length_diff;
}

2
src/dom/.vimrc Normal file
View File

@ -0,0 +1,2 @@
:set runtimepath+=.
:runtime ../../.vimrc

12
src/dom/Makefile Normal file
View File

@ -0,0 +1,12 @@
top_builddir=../..
include $(top_builddir)/Makefile.config
SUBDIRS = css sgml
OBJS = node.o select.o stack.o scanner.o
SUBDIRS-$(CONFIG_DEBUG) += test
test: all
make test -C test
include $(top_srcdir)/Makefile.lib

View File

@ -1,8 +1,6 @@
top_builddir=../../..
include $(top_builddir)/Makefile.config
INCLUDES += $(SEE_CFLAGS)
OBJS = see.o core.o hooks.o interface.o
OBJS = scanner.o
include $(top_srcdir)/Makefile.lib

388
src/dom/css/scanner.c Normal file
View File

@ -0,0 +1,388 @@
/* CSS token scanner utilities */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <string.h>
#include "elinks.h"
#include "dom/css/scanner.h"
#include "dom/scanner.h"
#include "dom/string.h"
#include "util/error.h"
/* Bitmap entries for the CSS character groups used in the scanner table */
enum css_char_group {
CSS_CHAR_ALPHA = (1 << 0),
CSS_CHAR_DIGIT = (1 << 1),
CSS_CHAR_HEX_DIGIT = (1 << 2),
CSS_CHAR_IDENT = (1 << 3),
CSS_CHAR_IDENT_START = (1 << 4),
CSS_CHAR_NEWLINE = (1 << 5),
CSS_CHAR_NON_ASCII = (1 << 6),
CSS_CHAR_SGML_MARKUP = (1 << 7),
CSS_CHAR_TOKEN = (1 << 8),
CSS_CHAR_TOKEN_START = (1 << 9),
CSS_CHAR_WHITESPACE = (1 << 10),
};
static const struct dom_scan_table_info css_scan_table_info[] = {
DOM_SCAN_TABLE_RANGE("0", '9', CSS_CHAR_DIGIT | CSS_CHAR_HEX_DIGIT | CSS_CHAR_IDENT),
DOM_SCAN_TABLE_RANGE("A", 'F', CSS_CHAR_HEX_DIGIT),
DOM_SCAN_TABLE_RANGE("A", 'Z', CSS_CHAR_ALPHA | CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),
DOM_SCAN_TABLE_RANGE("a", 'f', CSS_CHAR_HEX_DIGIT),
DOM_SCAN_TABLE_RANGE("a", 'z', CSS_CHAR_ALPHA | CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),
/* For the octal number impared (me including) \241 is 161 --jonas */
DOM_SCAN_TABLE_RANGE("\241", 255, CSS_CHAR_NON_ASCII | CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),
DOM_SCAN_TABLE_STRING(" \f\n\r\t\v\000", CSS_CHAR_WHITESPACE),
DOM_SCAN_TABLE_STRING("\f\n\r", CSS_CHAR_NEWLINE),
DOM_SCAN_TABLE_STRING("-", CSS_CHAR_IDENT),
DOM_SCAN_TABLE_STRING(".#@!\"'<-/|^$*", CSS_CHAR_TOKEN_START),
/* Unicode escape (that we do not handle yet) + other special chars */
DOM_SCAN_TABLE_STRING("\\_", CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),
/* This should contain mostly used char tokens like ':' and maybe a few
* garbage chars that people might put in their CSS code */
DOM_SCAN_TABLE_STRING("[({})];:,.>+~", CSS_CHAR_TOKEN),
DOM_SCAN_TABLE_STRING("<![CDATA]->", CSS_CHAR_SGML_MARKUP),
DOM_SCAN_TABLE_END,
};
#define CSS_STRING_MAP(str, type, family) \
{ INIT_DOM_STRING(str, -1), CSS_TOKEN_##type, CSS_TOKEN_##family }
static const struct dom_scanner_string_mapping css_string_mappings[] = {
CSS_STRING_MAP("Hz", FREQUENCY, DIMENSION),
CSS_STRING_MAP("cm", LENGTH, DIMENSION),
CSS_STRING_MAP("deg", ANGLE, DIMENSION),
CSS_STRING_MAP("em", EM, DIMENSION),
CSS_STRING_MAP("ex", EX, DIMENSION),
CSS_STRING_MAP("grad", ANGLE, DIMENSION),
CSS_STRING_MAP("in", LENGTH, DIMENSION),
CSS_STRING_MAP("kHz", FREQUENCY, DIMENSION),
CSS_STRING_MAP("mm", LENGTH, DIMENSION),
CSS_STRING_MAP("ms", TIME, DIMENSION),
CSS_STRING_MAP("pc", LENGTH, DIMENSION),
CSS_STRING_MAP("pt", LENGTH, DIMENSION),
CSS_STRING_MAP("px", LENGTH, DIMENSION),
CSS_STRING_MAP("rad", ANGLE, DIMENSION),
CSS_STRING_MAP("s", TIME, DIMENSION),
CSS_STRING_MAP("rgb", RGB, FUNCTION),
CSS_STRING_MAP("url", URL, FUNCTION),
CSS_STRING_MAP("charset", AT_CHARSET, AT_KEYWORD),
CSS_STRING_MAP("font-face", AT_FONT_FACE, AT_KEYWORD),
CSS_STRING_MAP("import", AT_IMPORT, AT_KEYWORD),
CSS_STRING_MAP("media", AT_MEDIA, AT_KEYWORD),
CSS_STRING_MAP("page", AT_PAGE, AT_KEYWORD),
DOM_STRING_MAP_END,
};
static struct dom_scanner_token *scan_css_tokens(struct dom_scanner *scanner);
struct dom_scanner_info dom_css_scanner_info = {
css_string_mappings,
css_scan_table_info,
scan_css_tokens,
};
#define check_css_table(c, bit) (dom_css_scanner_info.scan_table[(c)] & (bit))
#define scan_css(scanner, s, bit) \
while ((s) < (scanner)->end && check_css_table(*(s), bit)) (s)++;
#define scan_back_css(scanner, s, bit) \
while ((s) >= (scanner)->string && check_css_table(*(s), bit)) (s)--;
#define is_css_ident_start(c) check_css_table(c, CSS_CHAR_IDENT_START)
#define is_css_ident(c) check_css_table(c, CSS_CHAR_IDENT)
#define is_css_digit(c) check_css_table(c, CSS_CHAR_DIGIT)
#define is_css_hexdigit(c) check_css_table(c, CSS_CHAR_HEX_DIGIT)
#define is_css_char_token(c) check_css_table(c, CSS_CHAR_TOKEN)
#define is_css_token_start(c) check_css_table(c, CSS_CHAR_TOKEN_START)
#define skip_css(scanner, s, skipto) \
while (s < (scanner)->end \
&& *(s) != (skipto) \
&& check_css_precedence(*(s), skipto)) { \
if (isquote(*(s))) { \
int size = (scanner)->end - (s); \
unsigned char *end = memchr(s + 1, *(s), size); \
\
if (end) (s) = end; \
} \
(s)++; \
}
static inline void
scan_css_token(struct dom_scanner *scanner, struct dom_scanner_token *token)
{
unsigned char *string = scanner->position;
unsigned char first_char = *string;
enum css_token_type type = CSS_TOKEN_GARBAGE;
int real_length = -1;
assert(first_char);
token->string.string = string++;
if (is_css_char_token(first_char)) {
type = first_char;
} else if (is_css_digit(first_char) || first_char == '.') {
scan_css(scanner, string, CSS_CHAR_DIGIT);
/* First scan the full number token */
if (*string == '.') {
string++;
if (is_css_digit(*string)) {
type = CSS_TOKEN_NUMBER;
scan_css(scanner, string, CSS_CHAR_DIGIT);
}
}
/* Check what kind of number we have */
if (*string == '%') {
if (first_char != '.')
type = CSS_TOKEN_PERCENTAGE;
string++;
} else if (!is_css_ident_start(*string)) {
type = CSS_TOKEN_NUMBER;
} else {
unsigned char *ident = string;
scan_css(scanner, string, CSS_CHAR_IDENT);
type = map_dom_scanner_string(scanner, ident, string,
CSS_TOKEN_DIMENSION);
}
} else if (is_css_ident_start(first_char)) {
scan_css(scanner, string, CSS_CHAR_IDENT);
if (*string == '(') {
unsigned char *function_end = string + 1;
/* Make sure that we have an ending ')' */
skip_css(scanner, function_end, ')');
if (*function_end == ')') {
type = map_dom_scanner_string(scanner, token->string.string,
string, CSS_TOKEN_FUNCTION);
/* If it is not a known function just skip the
* how arg stuff so we don't end up generating
* a lot of useless tokens. */
if (type == CSS_TOKEN_FUNCTION) {
string = function_end;
} else if (type == CSS_TOKEN_URL) {
/* Extracting the URL first removes any
* leading or ending whitespace and
* then see if the url is given in a
* string. If that is the case the
* string delimiters are also trimmed.
* This is not totally correct because
* we should of course handle escape
* sequences .. but that will have to
* be fixed later. */
unsigned char *from = string + 1;
unsigned char *to = function_end - 1;
scan_css(scanner, from, CSS_CHAR_WHITESPACE);
scan_back_css(scanner, to, CSS_CHAR_WHITESPACE);
if (isquote(*from)) from++;
if (isquote(*to)) to--;
token->string.string = from;
real_length = to - from + 1;
assert(real_length >= 0);
string = function_end;
}
assert(type != CSS_TOKEN_RGB || *string == '(');
assert(type != CSS_TOKEN_URL || *string == ')');
assert(type != CSS_TOKEN_FUNCTION || *string == ')');
}
string++;
} else {
type = CSS_TOKEN_IDENT;
}
} else if (!is_css_token_start(first_char)) {
/* TODO: Better composing of error tokens. For now we just
* split them down into char tokens */
} else if (first_char == '#') {
/* Check whether it is hexcolor or hash token */
if (is_css_hexdigit(*string)) {
int hexdigits;
scan_css(scanner, string, CSS_CHAR_HEX_DIGIT);
/* Check that the hexdigit sequence is either 3 or 6
* chars and it isn't just start of some non-hex ident
* string. */
hexdigits = string - token->string.string - 1;
if ((hexdigits == 3 || hexdigits == 6)
&& !is_css_ident(*string)) {
type = CSS_TOKEN_HEX_COLOR;
} else {
scan_css(scanner, string, CSS_CHAR_IDENT);
type = CSS_TOKEN_HASH;
}
} else if (is_css_ident(*string)) {
/* Not *_ident_start() because hashes are #<name>. */
scan_css(scanner, string, CSS_CHAR_IDENT);
type = CSS_TOKEN_HASH;
}
} else if (first_char == '@') {
/* Compose token containing @<ident> */
if (is_css_ident_start(*string)) {
unsigned char *ident = string;
/* Scan both ident start and ident */
scan_css(scanner, string, CSS_CHAR_IDENT);
type = map_dom_scanner_string(scanner, ident, string,
CSS_TOKEN_AT_KEYWORD);
}
} else if (first_char == '*') {
if (*string == '=') {
type = CSS_TOKEN_SELECT_CONTAINS;
string++;
} else {
type = CSS_TOKEN_IDENT;
}
} else if (first_char == '^') {
if (*string == '=') {
type = CSS_TOKEN_SELECT_BEGIN;
string++;
}
} else if (first_char == '$') {
if (*string == '=') {
type = CSS_TOKEN_SELECT_END;
string++;
}
} else if (first_char == '|') {
if (*string == '=') {
type = CSS_TOKEN_SELECT_HYPHEN_LIST;
string++;
}
} else if (first_char == '!') {
scan_css(scanner, string, CSS_CHAR_WHITESPACE);
if (!strncasecmp(string, "important", 9)) {
type = CSS_TOKEN_IMPORTANT;
string += 9;
}
} else if (isquote(first_char)) {
/* TODO: Escaped delimiters --jonas */
int size = scanner->end - string;
unsigned char *string_end = memchr(string, first_char, size);
if (string_end) {
/* We don't want the delimiters in the token */
token->string.string++;
real_length = string_end - token->string.string;
string = string_end + 1;
type = CSS_TOKEN_STRING;
}
} else if (first_char == '<' || first_char == '-') {
/* Try to navigate SGML tagsoup */
if (*string == '/') {
/* Some kind of SGML tag end ... better bail out screaming */
type = CSS_TOKEN_NONE;
} else {
unsigned char *sgml = string;
/* Skip anything looking like SGML "<!--" and "-->"
* comments + <![CDATA[ and ]]> notations. */
scan_css(scanner, sgml, CSS_CHAR_SGML_MARKUP);
if (sgml - string >= 2
&& ((first_char == '<' && *string == '!')
|| (first_char == '-' && sgml[-1] == '>'))) {
type = CSS_TOKEN_SKIP;
string = sgml;
}
}
} else if (first_char == '/') {
/* Comments */
if (*string == '*') {
type = CSS_TOKEN_SKIP;
for (string++; string < scanner->end; string++)
if (*string == '*' && string[1] == '/') {
string += 2;
break;
}
}
} else {
INTERNAL("Someone forgot to put code for recognizing tokens "
"which start with '%c'.", first_char);
}
token->type = type;
token->string.length = real_length > 0 ? real_length : string - token->string.string;
token->precedence = get_css_precedence(type);
scanner->position = string;
}
static struct dom_scanner_token *
scan_css_tokens(struct dom_scanner *scanner)
{
struct dom_scanner_token *table_end = scanner->table + DOM_SCANNER_TOKENS;
struct dom_scanner_token *current;
if (!begin_dom_token_scanning(scanner))
return get_dom_scanner_token(scanner);
/* Scan tokens until we fill the table */
for (current = scanner->table + scanner->tokens;
current < table_end && scanner->position < scanner->end;
current++) {
scan_css(scanner, scanner->position, CSS_CHAR_WHITESPACE);
if (scanner->position >= scanner->end) break;
scan_css_token(scanner, current);
/* Did some one scream for us to end the madness? */
if (current->type == CSS_TOKEN_NONE) {
scanner->position = NULL;
current--;
break;
}
/* Shall we scratch this token? */
if (current->type == CSS_TOKEN_SKIP) {
current--;
}
}
return end_dom_token_scanning(scanner, current);
}

112
src/dom/css/scanner.h Normal file
View File

@ -0,0 +1,112 @@
#ifndef EL__DOM_CSS_SCANNER_H
#define EL__DOM_CSS_SCANNER_H
#include "dom/scanner.h"
/* The various token types and what they contain. Patterns taken from
* the flex scanner declarations in the CSS 2 Specification. */
enum css_token_type {
/* Char tokens: */
/* Char tokens range from 1 to 255 and have their char value as type */
/* meaning non char tokens have values from 256 and up. */
/* Low level string tokens: */
/* {...} means char group, <...> means token */
/* {identstart} [a-z_]|{nonascii} */
/* {ident} [a-z0-9_-]|{nonascii} */
/* <ident> {identstart}{ident}* */
/* <name> {ident}+ */
/* <number> [0-9]+|[0-9]*"."[0-9]+ */
/* Percentage is put because although it looks like being composed of
* <number> and '%' floating point numbers are really not allowed but
* strtol() will round it down for us ;) */
CSS_TOKEN_IDENT = 256, /* <ident> */
CSS_TOKEN_NUMBER, /* <number> */
CSS_TOKEN_PERCENTAGE, /* <number>% */
CSS_TOKEN_STRING, /* Char sequence delimted by matching ' or " */
/* High level string tokens: */
/* The various number values; dimension being the most generic */
CSS_TOKEN_ANGLE, /* <number>rad, <number>grad or <number>deg */
CSS_TOKEN_DIMENSION, /* <number><ident> */
CSS_TOKEN_EM, /* <number>em */
CSS_TOKEN_EX, /* <number>ex */
CSS_TOKEN_FREQUENCY, /* <number>Hz or <number>kHz */
CSS_TOKEN_LENGTH, /* <number>{px,cm,mm,in,pt,pc} */
CSS_TOKEN_TIME, /* <number>ms or <number>s */
/* XXX: CSS_TOKEN_HASH conflicts with CSS_TOKEN_HEX_COLOR. Generating
* hex color tokens has precedence and the hash token user have to
* treat CSS_TOKEN_HASH and CSS_TOKEN_HEX_COLOR alike. */
CSS_TOKEN_HASH, /* #<name> */
CSS_TOKEN_HEX_COLOR, /* #[0-9a-f]\{3,6} */
/* For all unknown functions we generate on token contain both function name
* and args so scanning/parsing is easier. Besides we already check for
* ending ')'. */
/* For known functions where we need several args [like rgb()] we want
* to generate tokens for every arg and arg delimiter ( ',' or ')' ).
* Because url() is a bit triggy: it can contain both <string> and some
* chars that would other wise make the scanner probably choke we also
* include the arg in that token. Besides it will make things like
* 'background' property parsing easier. */
CSS_TOKEN_FUNCTION, /* <ident>(<args>) */
CSS_TOKEN_RGB, /* rgb( */
CSS_TOKEN_URL, /* url(<arg>) */
/* @-rule symbols */
CSS_TOKEN_AT_KEYWORD, /* @<ident> */
CSS_TOKEN_AT_CHARSET, /* @charset */
CSS_TOKEN_AT_FONT_FACE, /* @font-face */
CSS_TOKEN_AT_IMPORT, /* @import */
CSS_TOKEN_AT_MEDIA, /* @media */
CSS_TOKEN_AT_PAGE, /* @page */
CSS_TOKEN_IMPORTANT, /* !<whitespace>important */
/* TODO: Selector stuff: */
CSS_TOKEN_SELECT_SPACE_LIST, /* ~= */
CSS_TOKEN_SELECT_HYPHEN_LIST, /* |= */
CSS_TOKEN_SELECT_BEGIN, /* ^= */
CSS_TOKEN_SELECT_END, /* $= */
CSS_TOKEN_SELECT_CONTAINS, /* *= */
/* Special tokens: */
/* A special token for unrecognized strings */
CSS_TOKEN_GARBAGE,
/* Token type used internally when scanning to signal that the token
* should not be recorded in the scanners token table. */
CSS_TOKEN_SKIP,
/* Another internal token type used both to mark unused tokens in the
* scanner table as invalid or when scanning to signal that the
* scanning should end. */
CSS_TOKEN_NONE = 0,
};
extern struct dom_scanner_info dom_css_scanner_info;
#define skip_css_tokens(scanner, type) \
skip_dom_scanner_tokens(scanner, type, get_css_precedence(type))
#define get_css_precedence(token_type) \
((token_type) == '}' ? (1 << 10) : \
(token_type) == '{' ? (1 << 9) : \
(token_type) == ';' ? (1 << 8) : \
(token_type) == ')' ? (1 << 7) : 0)
/* Check whether it is safe to skip the @token when looking for @skipto. */
static inline int
check_css_precedence(int type, int skipto)
{
return get_css_precedence(type) < get_css_precedence(skipto);
}
#endif

View File

@ -1,5 +1,5 @@
#ifndef EL__DOCUMENT_DOM_DOM_H
#define EL__DOCUMENT_DOM_DOM_H
#ifndef EL_DOM_DOM_H
#define EL_DOM_DOM_H
enum dom_exception_code {
DOM_ERR_NONE = 0,

View File

@ -9,13 +9,10 @@
#include "elinks.h"
#include "document/dom/node.h"
#include "document/options.h"
#include "intl/charsets.h"
#include "dom/node.h"
#include "dom/string.h"
#include "util/hash.h"
#include "util/lists.h"
#include "util/memory.h"
#include "util/string.h"
static void done_dom_node_data(struct dom_node *node);
@ -94,7 +91,7 @@ del_from_dom_node_list(struct dom_node_list *list, struct dom_node *node)
if (!list) return;
foreach_dom_node(i, entry, list) {
foreach_dom_node (list, entry, i) {
size_t successors;
if (entry != node) continue;
@ -115,7 +112,7 @@ done_dom_node_list(struct dom_node_list *list)
assert(list);
foreach_dom_node (i, node, list) {
foreach_dom_node (list, node, i) {
/* Avoid that the node start messing with the node list. */
done_dom_node_data(node);
}
@ -128,41 +125,33 @@ done_dom_node_list(struct dom_node_list *list)
struct dom_node_search {
struct dom_node *key;
int subtype;
unsigned int from, pos, to;
};
#define INIT_DOM_NODE_SEARCH(key, subtype, list) \
{ (key), (subtype), -1, 0, (list)->size, }
#define INIT_DOM_NODE_SEARCH(key, list) \
{ (key), -1, 0, (list)->size, }
static inline int
dom_node_cmp(struct dom_node_search *search, struct dom_node *node)
int
dom_node_casecmp(struct dom_node *node1, struct dom_node *node2)
{
struct dom_node *key = search->key;
if (search->subtype) {
assert(key->type == node->type);
switch (key->type) {
if (node1->type == node2->type) {
switch (node1->type) {
case DOM_NODE_ELEMENT:
return search->subtype - node->data.element.type;
if (node1->data.element.type && node2->data.element.type)
return node1->data.element.type - node2->data.element.type;
break;
case DOM_NODE_ATTRIBUTE:
return search->subtype - node->data.attribute.type;
if (node1->data.attribute.type && node2->data.attribute.type)
return node1->data.attribute.type - node2->data.attribute.type;
break;
default:
break;
}
}
{
int length = int_min(key->length, node->length);
int string_diff = strncasecmp(key->string, node->string, length);
/* If the lengths or strings don't match strncasecmp() does the
* job else return which ever is bigger. */
return string_diff ? string_diff : key->length - node->length;
}
return dom_string_casecmp(&node1->string, &node2->string);
}
static inline int
@ -184,7 +173,7 @@ dom_node_list_bsearch(struct dom_node_search *search, struct dom_node_list *list
do {
int pos = get_bsearch_position(list, search->from, search->to);
struct dom_node *node = list->entries[pos];
int difference = dom_node_cmp(search, node);
int difference = dom_node_casecmp(search->key, node);
search->pos = pos;
@ -203,7 +192,7 @@ dom_node_list_bsearch(struct dom_node_search *search, struct dom_node_list *list
int get_dom_node_map_index(struct dom_node_list *list, struct dom_node *node)
{
struct dom_node_search search = INIT_DOM_NODE_SEARCH(node, 0, list);
struct dom_node_search search = INIT_DOM_NODE_SEARCH(node, list);
struct dom_node *match = dom_node_list_bsearch(&search, list);
return match ? search.pos : search.to;
@ -211,20 +200,54 @@ int get_dom_node_map_index(struct dom_node_list *list, struct dom_node *node)
struct dom_node *
get_dom_node_map_entry(struct dom_node_list *list, enum dom_node_type type,
uint16_t subtype, unsigned char *name, int namelen)
uint16_t subtype, struct dom_string *name)
{
struct dom_node node = { type, namelen, name };
struct dom_node_search search = INIT_DOM_NODE_SEARCH(&node, subtype, list);
struct dom_node node = { type, INIT_DOM_STRING(name->string, name->length) };
struct dom_node_search search = INIT_DOM_NODE_SEARCH(&node, list);
if (subtype) {
/* Set the subtype */
switch (type) {
case DOM_NODE_ELEMENT:
node.data.element.type = subtype;
break;
case DOM_NODE_ATTRIBUTE:
node.data.attribute.type = subtype;
break;
case DOM_NODE_PROCESSING_INSTRUCTION:
node.data.proc_instruction.type = subtype;
break;
default:
break;
}
}
return dom_node_list_bsearch(&search, list);
}
int
get_dom_node_list_index(struct dom_node *parent, struct dom_node *node)
{
struct dom_node_list **list = get_dom_node_list(parent, node);
struct dom_node *entry;
int i;
if (!list) return -1;
foreach_dom_node (*list, entry, i) {
if (entry == node)
return i;
}
return -1;
}
/* Nodes */
struct dom_node *
init_dom_node_(unsigned char *file, int line,
struct dom_node *parent, enum dom_node_type type,
unsigned char *string, uint16_t length)
struct dom_string *string)
{
#ifdef DEBUG_MEMLEAK
struct dom_node *node = debug_mem_calloc(file, line, 1, sizeof(*node));
@ -235,18 +258,16 @@ init_dom_node_(unsigned char *file, int line,
if (!node) return NULL;
node->type = type;
node->string = string;
node->length = length;
node->parent = parent;
copy_dom_string(&node->string, string);
if (parent) {
struct dom_node_list **list = get_dom_node_list(parent, node);
int sort = (type == DOM_NODE_ATTRIBUTE);
int index;
assertm(list, "Adding %s to bad parent %s",
get_dom_node_type_name(node->type),
get_dom_node_type_name(parent->type));
assertm(list, "Adding node %d to bad parent %d",
node->type, parent->type);
index = *list && (*list)->size > 0 && sort
? get_dom_node_map_index(*list, node) : -1;
@ -272,7 +293,7 @@ done_dom_node_data(struct dom_node *node)
switch (node->type) {
case DOM_NODE_ATTRIBUTE:
if (data->attribute.allocated)
mem_free(node->string);
done_dom_string(&node->string);
break;
case DOM_NODE_DOCUMENT:
@ -296,7 +317,7 @@ done_dom_node_data(struct dom_node *node)
case DOM_NODE_TEXT:
if (data->text.allocated)
mem_free(node->string);
done_dom_string(&node->string);
break;
case DOM_NODE_PROCESSING_INSTRUCTION:
@ -346,140 +367,94 @@ done_dom_node(struct dom_node *node)
#define set_node_name(name, namelen, str) \
do { (name) = (str); (namelen) = sizeof(str) - 1; } while (0)
unsigned char *
struct dom_string *
get_dom_node_name(struct dom_node *node)
{
unsigned char *name;
uint16_t namelen;
static struct dom_string cdata_section_str = INIT_DOM_STRING("#cdata-section", -1);
static struct dom_string comment_str = INIT_DOM_STRING("#comment", -1);
static struct dom_string document_str = INIT_DOM_STRING("#document", -1);
static struct dom_string document_fragment_str = INIT_DOM_STRING("#document-fragment", -1);
static struct dom_string text_str = INIT_DOM_STRING("#text", -1);
assert(node);
switch (node->type) {
case DOM_NODE_CDATA_SECTION:
set_node_name(name, namelen, "#cdata-section");
break;
case DOM_NODE_CDATA_SECTION:
return &cdata_section_str;
case DOM_NODE_COMMENT:
set_node_name(name, namelen, "#comment");
break;
case DOM_NODE_COMMENT:
return &comment_str;
case DOM_NODE_DOCUMENT:
set_node_name(name, namelen, "#document");
break;
case DOM_NODE_DOCUMENT:
return &document_str;
case DOM_NODE_DOCUMENT_FRAGMENT:
set_node_name(name, namelen, "#document-fragment");
break;
case DOM_NODE_DOCUMENT_FRAGMENT:
return &document_fragment_str;
case DOM_NODE_TEXT:
set_node_name(name, namelen, "#text");
break;
case DOM_NODE_TEXT:
return &text_str;
case DOM_NODE_ATTRIBUTE:
case DOM_NODE_DOCUMENT_TYPE:
case DOM_NODE_ELEMENT:
case DOM_NODE_ENTITY:
case DOM_NODE_ENTITY_REFERENCE:
case DOM_NODE_NOTATION:
case DOM_NODE_PROCESSING_INSTRUCTION:
default:
name = node->string;
namelen = node->length;
case DOM_NODE_ATTRIBUTE:
case DOM_NODE_DOCUMENT_TYPE:
case DOM_NODE_ELEMENT:
case DOM_NODE_ENTITY:
case DOM_NODE_ENTITY_REFERENCE:
case DOM_NODE_NOTATION:
case DOM_NODE_PROCESSING_INSTRUCTION:
default:
return &node->string;
}
return memacpy(name, namelen);
}
static inline unsigned char *
compress_string(unsigned char *string, unsigned int length)
struct dom_string *
get_dom_node_value(struct dom_node *node)
{
struct string buffer;
unsigned char escape[2] = "\\";
if (!init_string(&buffer)) return NULL;
for (; length > 0; string++, length--) {
unsigned char *bytes = string;
if (*string == '\n' || *string == '\r' || *string == '\t') {
bytes = escape;
escape[1] = *string == '\n' ? 'n'
: (*string == '\r' ? 'r' : 't');
}
add_bytes_to_string(&buffer, bytes, bytes == escape ? 2 : 1);
}
return buffer.source;
}
unsigned char *
get_dom_node_value(struct dom_node *node, int codepage)
{
unsigned char *value;
uint16_t valuelen;
assert(node);
switch (node->type) {
case DOM_NODE_ATTRIBUTE:
value = node->data.attribute.value;
valuelen = node->data.attribute.valuelen;
break;
case DOM_NODE_ATTRIBUTE:
return &node->data.attribute.value;
case DOM_NODE_PROCESSING_INSTRUCTION:
value = node->data.proc_instruction.instruction;
valuelen = node->data.proc_instruction.instructionlen;
break;
case DOM_NODE_PROCESSING_INSTRUCTION:
return &node->data.proc_instruction.instruction;
case DOM_NODE_CDATA_SECTION:
case DOM_NODE_COMMENT:
case DOM_NODE_TEXT:
value = node->string;
valuelen = node->length;
break;
case DOM_NODE_CDATA_SECTION:
case DOM_NODE_COMMENT:
case DOM_NODE_TEXT:
return &node->string;
case DOM_NODE_ENTITY_REFERENCE:
value = get_entity_string(node->string, node->length,
codepage);
valuelen = value ? strlen(value) : 0;
break;
case DOM_NODE_NOTATION:
case DOM_NODE_DOCUMENT:
case DOM_NODE_DOCUMENT_FRAGMENT:
case DOM_NODE_DOCUMENT_TYPE:
case DOM_NODE_ELEMENT:
case DOM_NODE_ENTITY:
default:
return NULL;
case DOM_NODE_ENTITY_REFERENCE:
case DOM_NODE_NOTATION:
case DOM_NODE_DOCUMENT:
case DOM_NODE_DOCUMENT_FRAGMENT:
case DOM_NODE_DOCUMENT_TYPE:
case DOM_NODE_ELEMENT:
case DOM_NODE_ENTITY:
default:
return NULL;
}
if (!value) value = "";
return compress_string(value, valuelen);
}
unsigned char *
struct dom_string *
get_dom_node_type_name(enum dom_node_type type)
{
static unsigned char *dom_node_type_names[DOM_NODES] = {
NULL,
/* DOM_NODE_ELEMENT */ "element",
/* DOM_NODE_ATTRIBUTE */ "attribute",
/* DOM_NODE_TEXT */ "text",
/* DOM_NODE_CDATA_SECTION */ "cdata-section",
/* DOM_NODE_ENTITY_REFERENCE */ "entity-reference",
/* DOM_NODE_ENTITY */ "entity",
/* DOM_NODE_PROCESSING_INSTRUCTION */ "proc-instruction",
/* DOM_NODE_COMMENT */ "comment",
/* DOM_NODE_DOCUMENT */ "document",
/* DOM_NODE_DOCUMENT_TYPE */ "document-type",
/* DOM_NODE_DOCUMENT_FRAGMENT */ "document-fragment",
/* DOM_NODE_NOTATION */ "notation",
static struct dom_string dom_node_type_names[DOM_NODES] = {
INIT_DOM_STRING(NULL, 0),
/* DOM_NODE_ELEMENT */ INIT_DOM_STRING("element", -1),
/* DOM_NODE_ATTRIBUTE */ INIT_DOM_STRING("attribute", -1),
/* DOM_NODE_TEXT */ INIT_DOM_STRING("text", -1),
/* DOM_NODE_CDATA_SECTION */ INIT_DOM_STRING("cdata-section", -1),
/* DOM_NODE_ENTITY_REFERENCE */ INIT_DOM_STRING("entity-reference", -1),
/* DOM_NODE_ENTITY */ INIT_DOM_STRING("entity", -1),
/* DOM_NODE_PROCESSING_INSTRUCTION */ INIT_DOM_STRING("proc-instruction", -1),
/* DOM_NODE_COMMENT */ INIT_DOM_STRING("comment", -1),
/* DOM_NODE_DOCUMENT */ INIT_DOM_STRING("document", -1),
/* DOM_NODE_DOCUMENT_TYPE */ INIT_DOM_STRING("document-type", -1),
/* DOM_NODE_DOCUMENT_FRAGMENT */ INIT_DOM_STRING("document-fragment", -1),
/* DOM_NODE_NOTATION */ INIT_DOM_STRING("notation", -1),
};
assert(type < DOM_NODES);
return dom_node_type_names[type];
return &dom_node_type_names[type];
}

View File

@ -1,12 +1,15 @@
#ifndef EL__DOCUMENT_DOM_NODE_H
#define EL__DOCUMENT_DOM_NODE_H
#ifndef EL_DOM_NODE_H
#define EL_DOM_NODE_H
#include "dom/string.h"
#include "util/hash.h"
struct dom_node_list;
enum dom_node_type {
DOM_NODE_UNKNOWN = 0, /* for internal purpose only */
DOM_NODE_ELEMENT = 1,
DOM_NODE_ATTRIBUTE = 2,
DOM_NODE_TEXT = 3,
@ -30,11 +33,6 @@ struct dom_node_id_item {
/* The attibute node containing the id value */
struct dom_node *id_attribute;
/* The path to the node. This can be passed to the DOM navigator to
* locate the node. The path should not contain the id node itself.
* E.g. for <a id="..."> element "html/body/p/span". */
unsigned char *path;
/* The node with the @id attribute */
struct dom_node *node;
};
@ -54,15 +52,12 @@ struct dom_document_node {
};
struct dom_id {
unsigned char *public_id;
uint16_t pid_length;
uint16_t sid_length;
unsigned char *system_id;
struct dom_string public_id;
struct dom_string system_id;
};
struct dom_doctype_subset_info {
uint16_t internallen;
unsigned char *internal;
struct dom_string internal;
struct dom_id external;
};
@ -106,8 +101,7 @@ struct dom_attribute_node {
/* The string that hold the attribute value. The @string / @length
* members of {struct dom_node} holds the name that identifies the node
* in the map. */
unsigned char *value;
uint16_t valuelen;
struct dom_string value;
/* For xml:lang="en" attributes this holds the offset of 'lang' */
uint16_t namespace_offset;
@ -168,8 +162,7 @@ struct dom_proc_instruction_node {
/* The target of the processing instruction (xml for '<?xml ... ?>')
* is in the @string / @length members. */
/* This holds the value to be processed */
unsigned char *instruction;
uint16_t instructionlen;
struct dom_string instruction;
/* For fast checking of the target type */
uint16_t type; /* enum dom_proc_instruction_type */
@ -207,8 +200,7 @@ struct dom_node {
/* Can contain either stuff like element name or for attributes the
* attribute name. */
uint16_t length;
unsigned char *string;
struct dom_string string;
struct dom_node *parent;
@ -222,11 +214,11 @@ struct dom_node_list {
struct dom_node *entries[1];
};
#define foreach_dom_node(i, node, list) \
#define foreach_dom_node(list, node, i) \
for ((i) = 0; (i) < (list)->size; (i)++) \
if (((node) = (list)->entries[(i)]))
#define foreachback_dom_node(i, node, list) \
#define foreachback_dom_node(list, node, i) \
for ((i) = (list)->size - 1; (i) > 0; (i)--) \
if (((node) = (list)->entries[(i)]))
@ -241,6 +233,11 @@ add_to_dom_node_list(struct dom_node_list **list_ptr,
void done_dom_node_list(struct dom_node_list *list);
/* Returns the position or index where the @node has been inserted into the
* 'default' list of the @parent node. (Default means use get_dom_node_list()
* to acquire the list to search in. Returns -1, if the node is not found. */
int get_dom_node_list_index(struct dom_node *parent, struct dom_node *node);
/* Returns the position or index where the @node should be inserted into the
* node @list in order to the list to be alphabetically sorted. Assumes that
* @list is already sorted properly. */
@ -253,41 +250,39 @@ int get_dom_node_map_index(struct dom_node_list *list, struct dom_node *node);
struct dom_node *
get_dom_node_map_entry(struct dom_node_list *node_map,
enum dom_node_type type, uint16_t subtype,
unsigned char *name, int namelen);
struct dom_string *name);
struct dom_node *
init_dom_node_(unsigned char *file, int line,
struct dom_node *parent, enum dom_node_type type,
unsigned char *string, uint16_t length);
#define init_dom_node(type, string, length) init_dom_node_(__FILE__, __LINE__, NULL, type, string, length)
#define add_dom_node(parent, type, string, length) init_dom_node_(__FILE__, __LINE__, parent, type, string, length)
struct dom_string *string);
#define init_dom_node(type, string) init_dom_node_(__FILE__, __LINE__, NULL, type, string)
#define add_dom_node(parent, type, string) init_dom_node_(__FILE__, __LINE__, parent, type, string)
#define add_dom_element(parent, string, length) \
add_dom_node(parent, DOM_NODE_ELEMENT, string, length)
#define add_dom_element(parent, string) \
add_dom_node(parent, DOM_NODE_ELEMENT, string)
static inline struct dom_node *
add_dom_attribute(struct dom_node *parent, unsigned char *string, int length,
unsigned char *value, uint16_t valuelen)
add_dom_attribute(struct dom_node *parent, struct dom_string *name,
struct dom_string *value)
{
struct dom_node *node = add_dom_node(parent, DOM_NODE_ATTRIBUTE, string, length);
struct dom_node *node = add_dom_node(parent, DOM_NODE_ATTRIBUTE, name);
if (node && value) {
node->data.attribute.value = value;
node->data.attribute.valuelen = valuelen;
copy_dom_string(&node->data.attribute.value, value);
}
return node;
}
static inline struct dom_node *
add_dom_proc_instruction(struct dom_node *parent, unsigned char *string, int length,
unsigned char *instruction, uint16_t instructionlen)
add_dom_proc_instruction(struct dom_node *parent, struct dom_string *string,
struct dom_string *instruction)
{
struct dom_node *node = add_dom_node(parent, DOM_NODE_PROCESSING_INSTRUCTION, string, length);
struct dom_node *node = add_dom_node(parent, DOM_NODE_PROCESSING_INSTRUCTION, string);
if (node && instruction) {
node->data.proc_instruction.instruction = instruction;
node->data.proc_instruction.instructionlen = instructionlen;
copy_dom_string(&node->data.proc_instruction.instruction, instruction);
}
return node;
@ -296,21 +291,21 @@ add_dom_proc_instruction(struct dom_node *parent, unsigned char *string, int len
/* Removes the node and all its children and free()s itself */
void done_dom_node(struct dom_node *node);
/* Returns the name of the node in an allocated string. */
unsigned char *get_dom_node_name(struct dom_node *node);
/* Compare two nodes returning non-zero if they differ. */
int dom_node_casecmp(struct dom_node *node1, struct dom_node *node2);
/* Returns the value of the node in an allocated string.
* @codepage denotes how entity strings should be decoded. */
unsigned char *get_dom_node_value(struct dom_node *node, int codepage);
/* Returns the name of the node in an allocated string. */
struct dom_string *get_dom_node_name(struct dom_node *node);
/* Returns the value of the node or NULL if no value is defined for the node
* type. */
struct dom_string *get_dom_node_value(struct dom_node *node);
/* Returns the name used for identifying the node type. */
unsigned char *get_dom_node_type_name(enum dom_node_type type);
/* Returns a pointer to a node list containing attributes. */
#define get_dom_node_attributes(node) \
((node)->type == DOM_NODE_ELEMENT ? &(node)->data.element.map \
: NULL)
struct dom_string *get_dom_node_type_name(enum dom_node_type type);
/* Based on the type of the parent and the node return a proper list
* or NULL. This is useful when adding a node to a parent node. */
static inline struct dom_node_list **
get_dom_node_list(struct dom_node *parent, struct dom_node *node)
{

175
src/dom/scanner.c Normal file
View File

@ -0,0 +1,175 @@
/* A pretty generic scanner */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <string.h>
#include "elinks.h"
#include "dom/scanner.h"
#include "dom/string.h"
#include "util/error.h"
int
map_dom_scanner_string(struct dom_scanner *scanner,
unsigned char *ident, unsigned char *end, int base_type)
{
const struct dom_scanner_string_mapping *mappings = scanner->info->mappings;
struct dom_string name = INIT_DOM_STRING(ident, end - ident);
for (; is_dom_string_set(&mappings->name); mappings++) {
if (mappings->base_type == base_type
&& !dom_string_casecmp(&mappings->name, &name))
return mappings->type;
}
return base_type;
}
struct dom_scanner_token *
skip_dom_scanner_tokens(struct dom_scanner *scanner, int skipto, int precedence)
{
struct dom_scanner_token *token = get_dom_scanner_token(scanner);
/* Skip tokens while handling some basic precedens of special chars
* so we don't skip to long. */
while (token) {
if (token->type == skipto
|| token->precedence > precedence)
break;
token = get_next_dom_scanner_token(scanner);
}
return (token && token->type == skipto)
? get_next_dom_scanner_token(scanner) : NULL;
}
#ifdef DEBUG_SCANNER
void
dump_dom_scanner(struct dom_scanner *scanner)
{
unsigned char buffer[MAX_STR_LEN];
struct dom_scanner_token *token = scanner->current;
struct dom_scanner_token *table_end = scanner->table + scanner->tokens;
unsigned char *srcpos = token->string, *bufpos = buffer;
int src_lookahead = 50;
int token_lookahead = 4;
int srclen;
if (!dom_scanner_has_tokens(scanner)) return;
memset(buffer, 0, MAX_STR_LEN);
for (; token_lookahead > 0 && token < table_end; token++, token_lookahead--) {
int buflen = MAX_STR_LEN - (bufpos - buffer);
int added = snprintf(bufpos, buflen, "[%.*s] ", token->length, token->string);
bufpos += added;
}
if (scanner->tokens > token_lookahead) {
memcpy(bufpos, "... ", 4);
bufpos += 4;
}
srclen = strlen(srcpos);
int_upper_bound(&src_lookahead, srclen);
*bufpos++ = '[';
/* Compress the lookahead string */
for (; src_lookahead > 0; src_lookahead--, srcpos++, bufpos++) {
if (*srcpos == '\n' || *srcpos == '\r' || *srcpos == '\t') {
*bufpos++ = '\\';
*bufpos = *srcpos == '\n' ? 'n'
: (*srcpos == '\r' ? 'r' : 't');
} else {
*bufpos = *srcpos;
}
}
if (srclen > src_lookahead)
memcpy(bufpos, "...]", 4);
else
memcpy(bufpos, "]", 2);
errfile = scanner->file, errline = scanner->line;
elinks_wdebug("%s", buffer);
}
struct dom_scanner_token *
get_dom_scanner_token_debug(struct dom_scanner *scanner)
{
if (!dom_scanner_has_tokens(scanner)) return NULL;
dump_dom_scanner(scanner);
/* Make sure we do not return invalid tokens */
assert(!dom_scanner_has_tokens(scanner)
|| scanner->current->type != 0);
return get_dom_scanner_token(scanner);
}
#endif
/* Initializers */
static inline void
init_dom_scanner_info(struct dom_scanner_info *scanner_info)
{
const struct dom_scan_table_info *info = scanner_info->scan_table_info;
int *scan_table = scanner_info->scan_table;
int i;
if (!info) return;
for (i = 0; info[i].type != DOM_SCAN_END; i++) {
const struct dom_string *data = &info[i].data;
if (info[i].type == DOM_SCAN_RANGE) {
int index = *data->string;
assert(index > 0);
assert(data->length < DOM_SCAN_TABLE_SIZE);
assert(index <= data->length);
for (; index <= data->length; index++)
scan_table[index] |= info[i].bits;
} else {
unsigned char *string = info[i].data.string;
int pos = info[i].data.length - 1;
assert(info[i].type == DOM_SCAN_STRING && pos >= 0);
for (; pos >= 0; pos--)
scan_table[string[pos]] |= info[i].bits;
}
}
}
void
init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_info,
struct dom_string *string, int state, int count_lines)
{
if (!scanner_info->initialized) {
init_dom_scanner_info(scanner_info);
scanner_info->initialized = 1;
}
memset(scanner, 0, sizeof(*scanner));
scanner->string = string->string;
scanner->position = string->string;
scanner->end = string->string + string->length;
scanner->current = scanner->table;
scanner->info = scanner_info;
scanner->state = state;
scanner->count_lines = !!count_lines;
scanner->lineno = scanner->count_lines;
scanner->info->scan(scanner);
}

251
src/dom/scanner.h Normal file
View File

@ -0,0 +1,251 @@
#ifndef EL_DOM_SCANNER_H
#define EL_DOM_SCANNER_H
#include "dom/string.h"
#include "util/error.h"
/* Define if you want a talking scanner */
/* #define DEBUG_DOM_SCANNER */
/* The {struct dom_scanner_token} describes one scanner state. There are two
* kinds of tokens: char and non-char tokens. Char tokens contains only one
* char and simply have their char value as type. They are tokens having
* special control meaning in the code, like ':', ';', '{', '}' and '*'. Non
* char tokens has one or more chars and contain stuff like number or
* indentifier strings. */
struct dom_scanner_token {
/* The type the token */
int type;
/* Some precedence value */
int precedence;
/* The start of the token string and the token length */
struct dom_string string;
};
#define skip_dom_scanner_token_char(token) \
do { (token)->string.string++; (token)->string.length--; } while (0)
/* Compare the string of @token with the "static" string in @str. */
#define dom_scanner_token_contains(token, str) \
((token)->string.length == (sizeof(str) - 1) \
&& !strncasecmp((token)->string.string, str, sizeof(str) - 1))
struct dom_scan_table_info {
enum { DOM_SCAN_RANGE, DOM_SCAN_STRING, DOM_SCAN_END } type;
struct dom_string data;
int bits;
};
#define DOM_SCAN_TABLE_SIZE 256
#define DOM_SCAN_TABLE_INFO(type, data1, data2, bits) \
{ (type), INIT_DOM_STRING((data1), (data2)), (bits) }
#define DOM_SCAN_TABLE_RANGE(from, to, bits) \
DOM_SCAN_TABLE_INFO(DOM_SCAN_RANGE, from, to, bits)
#define DOM_SCAN_TABLE_STRING(str, bits) \
DOM_SCAN_TABLE_INFO(DOM_SCAN_STRING, str, sizeof(str) - 1, bits)
#define DOM_SCAN_TABLE_END \
DOM_SCAN_TABLE_INFO(DOM_SCAN_END, NULL, 0, 0)
struct dom_scanner_string_mapping {
struct dom_string name;
int type;
int base_type;
};
#define DOM_STRING_MAP(str, type, family) \
{ INIT_DOM_STRING(str, -1), (type), (family) }
#define DOM_STRING_MAP_END \
{ INIT_DOM_STRING(NULL, 0), 0, 0 }
struct dom_scanner;
struct dom_scanner_info {
/* Table containing how to map strings to token types */
const struct dom_scanner_string_mapping *mappings;
/* Information for how to initialize the scanner table */
const struct dom_scan_table_info *scan_table_info;
/* Fills the scanner with tokens. Already scanned tokens which have not
* been requested remain and are moved to the start of the scanners
* token table. */
/* Returns the current token or NULL if there are none. */
struct dom_scanner_token *(*scan)(struct dom_scanner *scanner);
/* The scanner table */
/* Contains bitmaps for the various characters groups.
* Idea sync'ed from mozilla browser. */
int scan_table[DOM_SCAN_TABLE_SIZE];
/* Has the scanner info been initialized? */
unsigned int initialized:1;
};
/* Initializes the scanner. */
void init_dom_scanner(struct dom_scanner *scanner, struct dom_scanner_info *scanner_info,
struct dom_string *string, int state, int count_lines);
/* The number of tokens in the scanners token table:
* At best it should be big enough to contain properties with space separated
* values and function calls with up to 3 variables like rgb(). At worst it
* should be no less than 2 in order to be able to peek at the next token in
* the scanner. */
#define DOM_SCANNER_TOKENS 10
/* The {struct dom_scanner} describes the current state of the scanner. */
struct dom_scanner {
/* The very start of the scanned string, the position in the string
* where to scan next and the end of the string. If position is NULL it
* means that no more tokens can be retrieved from the string. */
unsigned char *string, *position, *end;
/* The current token and number of scanned tokens in the table.
* If the number of scanned tokens is less than DOM_SCANNER_TOKENS it
* is because there are no more tokens in the string. */
struct dom_scanner_token *current;
int tokens;
/* The 'meta' scanner information */
struct dom_scanner_info *info;
#ifdef DEBUG_SCANNER
/* Debug info about the caller. */
unsigned char *file;
int line;
#endif
unsigned int count_lines:1;
unsigned int lineno;
/* Some state indicator only meaningful to the scanner internals */
int state;
/* The table contain already scanned tokens. It is maintained in
* order to optimize the scanning a bit and make it possible to look
* ahead at the next token. You should always use the accessors
* (defined below) for getting tokens from the scanner. */
struct dom_scanner_token table[DOM_SCANNER_TOKENS];
};
#define dom_scanner_has_tokens(scanner) \
((scanner)->tokens > 0 && (scanner)->current < (scanner)->table + (scanner)->tokens)
/* This macro checks if the current scanner state is valid. Meaning if the
* scanners table is full the last token skipping or get_next_scanner_token()
* call made it possible to get the type of the next token. */
#define check_dom_scanner(scanner) \
(scanner->tokens < DOM_SCANNER_TOKENS \
|| scanner->current + 1 < scanner->table + scanner->tokens)
/* Scanner table accessors and mutators */
/* Checks the type of the next token */
#define check_next_dom_scanner_token(scanner, token_type) \
(scanner_has_tokens(scanner) \
&& ((scanner)->current + 1 < (scanner)->table + (scanner)->tokens) \
&& (scanner)->current[1].type == (token_type))
/* Access current and next token. Getting the next token might cause
* a rescan so any token pointers that has been stored in a local variable
* might not be valid after the call. */
static inline struct dom_scanner_token *
get_dom_scanner_token(struct dom_scanner *scanner)
{
return dom_scanner_has_tokens(scanner) ? scanner->current : NULL;
}
/* Do a scanning if we do not have also have access to next token. */
static inline struct dom_scanner_token *
get_next_dom_scanner_token(struct dom_scanner *scanner)
{
return (dom_scanner_has_tokens(scanner)
&& (++scanner->current + 1 >= scanner->table + scanner->tokens)
? scanner->info->scan(scanner) : get_dom_scanner_token(scanner));
}
/* This should just make the code more understandable .. hopefully */
#define skip_dom_scanner_token(scanner) get_next_dom_scanner_token(scanner)
/* Removes tokens from the scanner until it meets a token of the given type.
* This token will then also be skipped. */
struct dom_scanner_token *
skip_dom_scanner_tokens(struct dom_scanner *scanner, int skipto, int precedence);
/* Looks up the string from @ident to @end to in the scanners string mapping
* table */
int
map_dom_scanner_string(struct dom_scanner *scanner,
unsigned char *ident, unsigned char *end, int base_type);
#ifdef DEBUG_DOM_SCANNER
void dump_dom_scanner(struct dom_scanner *scanner);
#endif
/* The begin_token_scanning() and end_token_scanning() functions provide the
* basic setup and teardown for the rescan function made public via the
* scanner_info->scan member. */
/* Returns NULL if it is not necessary to try to scan for more tokens */
static inline struct dom_scanner_token *
begin_dom_token_scanning(struct dom_scanner *scanner)
{
struct dom_scanner_token *table = scanner->table;
struct dom_scanner_token *table_end = table + scanner->tokens;
int move_to_front = int_max(table_end - scanner->current, 0);
struct dom_scanner_token *current = move_to_front ? scanner->current : table;
size_t moved_size = 0;
assert(scanner->current);
/* Move any untouched tokens */
if (move_to_front) {
moved_size = move_to_front * sizeof(*table);
memmove(table, current, moved_size);
current = &table[move_to_front];
}
/* Clear all unused tokens */
memset(current, 0, sizeof(*table) * DOM_SCANNER_TOKENS - moved_size);
if (!scanner->position) {
scanner->tokens = move_to_front ? move_to_front : -1;
scanner->current = table;
assert(check_dom_scanner(scanner));
return NULL;
}
scanner->tokens = move_to_front;
return table;
}
/* Updates the @scanner struct after scanning has been done. The position
* _after_ the last valid token is taken as the @end argument. */
/* It is ok for @end to be < scanner->table since scanner->tokens will become
* <= 0 anyway. */
static inline struct dom_scanner_token *
end_dom_token_scanning(struct dom_scanner *scanner, struct dom_scanner_token *end)
{
assert(end <= scanner->table + DOM_SCANNER_TOKENS);
scanner->tokens = (end - scanner->table);
scanner->current = scanner->table;
if (scanner->position >= scanner->end)
scanner->position = NULL;
assert(check_dom_scanner(scanner));
return get_dom_scanner_token(scanner);
}
#endif

1082
src/dom/select.c Normal file

File diff suppressed because it is too large Load Diff

357
src/dom/select.h Normal file
View File

@ -0,0 +1,357 @@
#ifndef EL_DOM_SELECT_H
#define EL_DOM_SELECT_H
#include "dom/node.h"
/* FIXME: Namespaces; *|E */
enum dom_select_element_match {
/* Gives info about the relation required between two element nodes for
* them to match. This is also referred to as combinators. */
/* The following are mutually exclusive and at least one must be set.
* DOM_SELECT_RELATION_DESCENDANT is the default. */
/* Matches any F descendant of E: E F */
/* Bogus flag; it is an easy way to have a default. */
DOM_SELECT_RELATION_DESCENDANT = 0,
/* Matches F being a direct child of E: E > F */
DOM_SELECT_RELATION_DIRECT_CHILD = 1,
/* Matches F immediate preceded by E: E + F */
DOM_SELECT_RELATION_DIRECT_ADJACENT = 2,
/* Matches F preceded by E: E ~ F */
DOM_SELECT_RELATION_INDIRECT_ADJACENT = 4,
DOM_SELECT_RELATION_FLAGS = DOM_SELECT_RELATION_DESCENDANT
| DOM_SELECT_RELATION_DIRECT_CHILD
| DOM_SELECT_RELATION_DIRECT_ADJACENT
| DOM_SELECT_RELATION_INDIRECT_ADJACENT,
/* None of the following are mutual exclusive. They can co-exist
* although combining them might not make a lot of sense. */
/* Matches any element: * */
DOM_SELECT_ELEMENT_UNIVERSAL = 8,
/* Matches the root node of the document: :root or // */
DOM_SELECT_ELEMENT_ROOT = 16,
/* Matches the empty element (not even text): :empty */
DOM_SELECT_ELEMENT_EMPTY = 32,
/* Matches the some n-th child of its parent: :nth-child(n), etc. */
DOM_SELECT_ELEMENT_NTH_CHILD = 64,
/* Matches the some n-th sibling of its type: :nth-of-type(n), etc. */
DOM_SELECT_ELEMENT_NTH_TYPE = 128,
};
/* The special CSS .bar class attribute syntax is represented as
* E[class="bar"]. The ID flag will match against any attribute with it's
* boolean id member set. XXX: These flags are ATM mutually exclusive. */
enum dom_select_attribute_match {
/* Matches any set value: E[foo] */
DOM_SELECT_ATTRIBUTE_ANY = 1,
/* Matches exact value "bar": E[foo="bar"] */
DOM_SELECT_ATTRIBUTE_EXACT = 2,
/* Matches space seprated list "z bar bee": E[foo~="bar"] */
DOM_SELECT_ATTRIBUTE_SPACE_LIST = 4,
/* Matches hyphen separated list "z-bar-bee": E[foo|="bar"] */
DOM_SELECT_ATTRIBUTE_HYPHEN_LIST = 8,
/* Matches value begining; "bar-z-bee": E[foo^="bar"]*/
DOM_SELECT_ATTRIBUTE_BEGIN = 16,
/* Matches value ending; "z-bee-bar": E[foo$="bar"] */
DOM_SELECT_ATTRIBUTE_END = 32,
/* Matches value containing; "m33p/bar\++": E[foo*="bar"] */
DOM_SELECT_ATTRIBUTE_CONTAINS = 64,
/* Matches exact ID attribute value "bar": #bar */
DOM_SELECT_ATTRIBUTE_ID = 128,
};
/* Info about text matching is stored in a DOM text node. */
enum dom_select_text_match {
/* Matches E containing substring "foo": E:contains("foo") */
DOM_SELECT_TEXT_CONTAINS = 1,
};
/* Info about what nth child or type to match. The basic syntax is:
*
* <step>n<index>
*
* with a little syntactic sugar.
*
* Examples:
*
* 0n+1 / 1 is first child (same as :first-child)
* 2n+0 / 2n / even is all even children
* 2n+1 / odd is all odd children
* -0n+2 is the last two children
* -0n+1 / -1 is last child (same as :last-child)
* 1n+0 / n+0 / n is all elements of type
* 0n+0 is only element of type (a special internal syntax
* used when storing nth-info)
*
* That is, a zero step (0n) means exact indexing, and non-zero step
* means stepwise indexing.
*/
struct dom_select_nth_match {
size_t step;
size_t index;
};
#define set_dom_select_nth_match(nth, nthstep, nthindex) \
do { (nth)->step = (nthstep); (nth)->index = (nthindex); } while(0)
/* This is supposed to be a simple selector. However, this struct is also used
* for holding data for attribute matching and element text matching. */
struct dom_select_node {
/* This holds the DOM node which has data about the node being matched.
* It can be either an element, attribute, or a text node. */
/* XXX: Keep at the top. This is used for translating dom_node
* reference to dom_select_node. */
struct dom_node node;
/* Only meaningful for element nodes. */
/* FIXME: Don't waste memory for non-element nodes. */
struct dom_select_nth_match nth_child;
struct dom_select_nth_match nth_type;
/* Flags, specifying how the matching should be done. */
union {
enum dom_select_element_match element;
enum dom_select_attribute_match attribute;
enum dom_select_text_match text;
} match;
};
enum dom_select_pseudo {
DOM_SELECT_PSEUDO_UNKNOWN = 0,
/* Pseudo-elements: */
/* Matches first formatted line: ::first-line */
DOM_SELECT_PSEUDO_FIRST_LINE = 1,
/* Matches first formatted letter: ::first-letter */
DOM_SELECT_PSEUDO_FIRST_LETTER = 2,
/* Matches text selected by user: ::selection */
DOM_SELECT_PSEUDO_SELECTION = 4,
/* Matches generated context after an element: ::after */
DOM_SELECT_PSEUDO_AFTER = 8,
/* Matches generated content before an element: ::before */
DOM_SELECT_PSEUDO_BEFORE = 16,
/* Pseudo-attributes: */
/* Link pseudo-classes: */
DOM_SELECT_PSEUDO_LINK = 32, /* :link */
DOM_SELECT_PSEUDO_VISITED = 64, /* :visited */
/* User action pseudo-classes: */
DOM_SELECT_PSEUDO_ACTIVE = 128, /* :active */
DOM_SELECT_PSEUDO_HOVER = 256, /* :hover */
DOM_SELECT_PSEUDO_FOCUS = 512, /* :focus */
/* Target pseudo-class: */
DOM_SELECT_PSEUDO_TARGET = 1024, /* :target */
/* UI element states pseudo-classes: */
DOM_SELECT_PSEUDO_ENABLED = 2048, /* :enabled */
DOM_SELECT_PSEUDO_DISABLED = 4096, /* :disabled */
DOM_SELECT_PSEUDO_CHECKED = 8192, /* :checked */
DOM_SELECT_PSEUDO_INDETERMINATE = 16384, /* :indeterminate */
/* XXX: The following pseudo-classes are not kept in the pseudo member
* of the dom_select struct so they should not be bitfields. They are
* mostly for parsing purposes. */
DOM_SELECT_PSEUDO_CONTAINS = 10000,
DOM_SELECT_PSEUDO_NTH_CHILD,
DOM_SELECT_PSEUDO_NTH_LAST_CHILD,
DOM_SELECT_PSEUDO_FIRST_CHILD,
DOM_SELECT_PSEUDO_LAST_CHILD,
DOM_SELECT_PSEUDO_ONLY_CHILD,
DOM_SELECT_PSEUDO_NTH_TYPE,
DOM_SELECT_PSEUDO_NTH_LAST_TYPE,
DOM_SELECT_PSEUDO_FIRST_TYPE,
DOM_SELECT_PSEUDO_LAST_TYPE,
DOM_SELECT_PSEUDO_ONLY_TYPE,
DOM_SELECT_PSEUDO_ROOT,
DOM_SELECT_PSEUDO_EMPTY,
};
struct dom_select {
struct dom_select_node *selector;
unsigned long specificity;
enum dom_select_pseudo pseudo;
};
enum dom_select_syntax {
DOM_SELECT_SYNTAX_CSS, /* Example: 'p a[id=node] a:hover */
DOM_SELECT_SYNTAX_PATH, /* Example: '//rss/channel/item' */
};
struct dom_select *init_dom_select(enum dom_select_syntax syntax,
struct dom_string *string);
void done_dom_select(struct dom_select *select);
struct dom_node_list *
select_dom_nodes(struct dom_select *select, struct dom_node *root);
/*
* +------------------------------------------------------------------------------------+
* | Pattern | Meaning | Type | Version |
* |-----------------------+------------------------------+-------------------+---------|
* | * | any element | Universal | 2 |
* | | | selector | |
* |-----------------------+------------------------------+-------------------+---------|
* | E | an element of type E | Type selector | 1 |
* |-----------------------+------------------------------+-------------------+---------|
* | E F | an F element descendant of | Descendant | 1 |
* | | an E element | combinator | |
* |-----------------------+------------------------------+-------------------+---------|
* | E > F | an F element child of an E | Child combinator | 2 |
* | | element | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E + F | an F element immediately | Direct adjacent | 2 |
* | | preceded by an E element | combinator | |
* |-----------------------+------------------------------+-------------------+---------|
* | E ~ F | an F element preceded by an | Indirect adjacent | 3 |
* | | E element | combinator | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:root | an E element, root of the | Structural | 3 |
* | | document | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element that has no | Structural | |
* | E:empty | children (including text | pseudo-classes | 3 |
* | | nodes) | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:first-child | an E element, first child of | Structural | 2 |
* | | its parent | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:last-child | an E element, last child of | Structural | 3 |
* | | its parent | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:nth-child(n) | an E element, the n-th child | Structural | 3 |
* | | of its parent | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element, the n-th child | Structural | |
* | E:nth-last-child(n) | of its parent, counting from | pseudo-classes | 3 |
* | | the last one | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:first-of-type | an E element, first sibling | Structural | 3 |
* | | of its type | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:last-of-type | an E element, last sibling | Structural | 3 |
* | | of its type | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:nth-of-type(n) | an E element, the n-th | Structural | 3 |
* | | sibling of its type | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element, the n-th | Structural | |
* | E:nth-last-of-type(n) | sibling of its type, | pseudo-classes | 3 |
* | | counting from the last one | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:only-child | an E element, only child of | Structural | 3 |
* | | its parent | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:only-of-type | an E element, only sibling | Structural | 3 |
* | | of its type | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element being the | | |
* | E:link | source anchor of a hyperlink | The link | |
* | E:visited | of which the target is not | pseudo-classes | 1 |
* | | yet visited (:link) or | | |
* | | already visited (:visited) | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:active | an E element during certain | The user action | |
* | E:hover | user actions | pseudo-classes | 1 and 2 |
* | E:focus | | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:target | an E element being the | The target | 3 |
* | | target of the referring URI | pseudo-class | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an element of type E in | | |
* | E:lang(fr) | language "fr" (the document | The :lang() | 2 |
* | FIXME | language specifies how | pseudo-class | |
* | | language is determined) | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:enabled | a user interface element E | The UI element | |
* | E:disabled | which is enabled or disabled | states | 3 |
* | | | pseudo-classes | |
* |-----------------------+------------------------------+-------------------+---------|
* | | a user interface element E | | |
* | E:checked | which is checked or in an | The UI element | |
* | E:indeterminate | indeterminate state (for | states | 3 |
* | | instance a radio-button or | pseudo-classes | |
* | | checkbox) | | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element containing the | Content | |
* | E:contains("foo") | substring "foo" in its | pseudo-class | 3 |
* | | textual contents | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E::first-line | the first formatted line of | The :first-line | 1 |
* | | an E element | pseudo-element | |
* |-----------------------+------------------------------+-------------------+---------|
* | E::first-letter | the first formatted letter | The :first-letter | 1 |
* | | of an E element | pseudo-element | |
* |-----------------------+------------------------------+-------------------+---------|
* | | the portion of an E element | The UI element | |
* | E::selection | that is currently | fragments | 3 |
* | | selected/highlighted by the | pseudo-elements | |
* | | user | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E::before | generated content before an | The :before | 2 |
* | | E element | pseudo-element | |
* |-----------------------+------------------------------+-------------------+---------|
* | E::after | generated content after an E | The :after | 2 |
* | | element | pseudo-element | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element whose class is | | |
* | E.warning | "warning" (the document | Class selectors | 1 |
* | | language specifies how class | | |
* | | is determined). | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E#myid | an E element with ID equal | ID selectors | 1 |
* | | to "myid". | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E[foo] | an E element with a "foo" | Attribute | 2 |
* | | attribute | selectors | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element whose "foo" | Attribute | |
* | E[foo="bar"] | attribute value is exactly | selectors | 2 |
* | | equal to "bar" | | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element whose "foo" | | |
* | | attribute value is a list of | Attribute | |
* | E[foo~="bar"] | space-separated values, one | selectors | 2 |
* | | of which is exactly equal to | | |
* | | "bar" | | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element whose "foo" | | |
* | E[foo^="bar"] | attribute value begins | Attribute | 3 |
* | | exactly with the string | selectors | |
* | | "bar" | | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element whose "foo" | Attribute | |
* | E[foo$="bar"] | attribute value ends exactly | selectors | 3 |
* | | with the string "bar" | | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element whose "foo" | Attribute | |
* | E[foo*="bar"] | attribute value contains the | selectors | 3 |
* | | substring "bar" | | |
* |-----------------------+------------------------------+-------------------+---------|
* | | an E element whose | | |
* | | "hreflang" attribute has a | Attribute | |
* | E[hreflang|="en"] | hyphen-separated list of | selectors | 2 |
* | | values beginning (from the | | |
* | | left) with "en" | | |
* |-----------------------+------------------------------+-------------------+---------|
* | E:not(s) | an E element that does not | Negation | 3 |
* | FIXME | match simple selector s | pseudo-class | |
* +------------------------------------------------------------------------------------+
*/
#endif

View File

@ -1,7 +1,7 @@
top_builddir=../../..
include $(top_builddir)/Makefile.config
SUBDIRS = html
SUBDIRS = html rss xbel
OBJS = sgml.o parser.o scanner.o
include $(top_srcdir)/Makefile.lib

View File

@ -2,6 +2,8 @@
/* This list is made from http://www.w3.org/TR/REC-html40/index/attributes.html
* unless otherwise noted. */
#undef VERSION
HTML_(ATTRIBUTE, ABBR, 0),
HTML_(ATTRIBUTE, ACCEPT, 0),
HTM2_(ATTRIBUTE, ACCEPT_CHARSET, "ACCEPT-CHARSET", 0),
@ -119,8 +121,7 @@ HTML_(ATTRIBUTE, USEMAP, SGML_ATTRIBUTE_REFERENCE),
HTML_(ATTRIBUTE, VALIGN, 0),
HTML_(ATTRIBUTE, VALUE, 0),
HTML_(ATTRIBUTE, VALUETYPE, 0),
/* XXX: Apparently #VERSION doesn.t work .. some predefined VERSION macro? */
HTM2_(ATTRIBUTE, VERSION, "VERSION", 0),
HTML_(ATTRIBUTE, VERSION, 0),
HTML_(ATTRIBUTE, VISIBILITY, 0),
HTML_(ATTRIBUTE, VLINK, 0),
HTML_(ATTRIBUTE, VSPACE, 0),

View File

@ -71,6 +71,7 @@ HTML_(ELEMENT, PRE, 0),
HTML_(ELEMENT, Q, 0),
HTML_(ELEMENT, S, 0),
HTML_(ELEMENT, SAMP, 0),
HTML_(ELEMENT, SCRIPT, 0),
HTML_(ELEMENT, SELECT, 0),
HTML_(ELEMENT, SMALL, 0),
HTML_(ELEMENT, SPAN, 0),

View File

@ -9,15 +9,8 @@
#include "elinks.h"
#include "document/dom/node.h"
#include "document/dom/stack.h"
#include "document/sgml/html/html.h"
#include "document/sgml/parser.h"
#include "document/sgml/scanner.h"
#include "document/sgml/sgml.h"
#include "util/error.h"
#include "util/memory.h"
#include "util/string.h"
#include "dom/sgml/html/html.h"
#include "dom/sgml/sgml.h"
#define HTML_(node, name, id) SGML_NODE_INFO(HTML, node, name, id)
@ -27,17 +20,18 @@
static struct sgml_node_info html_attributes[HTML_ATTRIBUTES] = {
SGML_NODE_HEAD(HTML, ATTRIBUTE),
#include "document/sgml/html/attribute.inc"
#include "dom/sgml/html/attribute.inc"
};
static struct sgml_node_info html_elements[HTML_ELEMENTS] = {
SGML_NODE_HEAD(HTML, ELEMENT),
#include "document/sgml/html/element.inc"
#include "dom/sgml/html/element.inc"
};
struct sgml_info sgml_html_info = {
SGML_DOCTYPE_HTML,
html_attributes,
html_elements,
};

View File

@ -1,9 +1,8 @@
#ifndef EL__DOCUMENT_SGML_HTML_HTML_H
#define EL__DOCUMENT_SGML_HTML_HTML_H
#ifndef EL_DOM_SGML_HTML_HTML_H
#define EL_DOM_SGML_HTML_HTML_H
#include "document/dom/stack.h"
#include "document/sgml/sgml.h"
#include "dom/sgml/sgml.h"
extern struct sgml_info sgml_html_info;
@ -14,7 +13,7 @@ extern struct sgml_info sgml_html_info;
enum html_element_type {
HTML_ELEMENT_UNKNOWN,
#include "document/sgml/html/element.inc"
#include "dom/sgml/html/element.inc"
HTML_ELEMENTS,
};
@ -22,7 +21,7 @@ enum html_element_type {
enum html_attribute_type {
HTML_ATTRIBUTE_UNKNOWN,
#include "document/sgml/html/attribute.inc"
#include "dom/sgml/html/attribute.inc"
HTML_ATTRIBUTES,
};

527
src/dom/sgml/parser.c Normal file
View File

@ -0,0 +1,527 @@
/* SGML node handling */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdlib.h>
#include <string.h>
#include "elinks.h"
#include "dom/node.h"
#include "dom/sgml/parser.h"
#include "dom/sgml/scanner.h"
#include "dom/sgml/sgml.h"
#include "dom/stack.h"
#include "dom/string.h"
#include "util/error.h"
#include "util/memory.h"
/* This holds info about a chunk of text being parsed. The SGML parser uses
* these to keep track of possible nested calls to parse_sgml(). This can be
* used to feed output of stuff like ECMAScripts document.write() from
* <script>-elements back to the SGML parser. */
struct sgml_parsing_state {
struct dom_scanner scanner;
struct dom_node *node;
size_t depth;
};
static struct sgml_parsing_state *
init_sgml_parsing_state(struct sgml_parser *parser, struct dom_string *buffer);
/* When getting the sgml_parser struct it is _always_ assumed that the parser
* is the first to add it's context, which it is since it initializes the
* stack. */
#define get_sgml_parser(stack) ((stack)->contexts[0]->data)
#define get_sgml_parser_state(stack, state) \
get_dom_stack_state_data(stack->contexts[0], state)
/* Functions for adding new nodes to the DOM tree: */
/* They wrap init_dom_node() and add_dom_*() and set up of additional
* information like node subtypes and SGML parser state information. */
static inline struct dom_node *
add_sgml_document(struct dom_stack *stack, struct dom_string *string)
{
struct dom_node *node = init_dom_node(DOM_NODE_DOCUMENT, string);
return node ? push_dom_node(stack, node) : NULL;
}
static inline struct dom_node *
add_sgml_element(struct dom_stack *stack, struct dom_scanner_token *token)
{
struct sgml_parser *parser = get_sgml_parser(stack);
struct dom_node *parent = get_dom_stack_top(stack)->node;
struct dom_stack_state *state;
struct sgml_parser_state *pstate;
struct dom_node *node;
struct sgml_node_info *node_info;
node = add_dom_element(parent, &token->string);
if (!node) return NULL;
node_info = get_sgml_node_info(parser->info->elements, node);
node->data.element.type = node_info->type;
if (!push_dom_node(stack, node))
return NULL;
state = get_dom_stack_top(stack);
assert(node == state->node);
pstate = get_sgml_parser_state(stack, state);
pstate->info = node_info;
return node;
}
static inline void
add_sgml_attribute(struct dom_stack *stack,
struct dom_scanner_token *token, struct dom_scanner_token *valtoken)
{
struct sgml_parser *parser = get_sgml_parser(stack);
struct dom_node *parent = get_dom_stack_top(stack)->node;
struct dom_string *value = valtoken ? &valtoken->string : NULL;
struct sgml_node_info *info;
struct dom_node *node;
node = add_dom_attribute(parent, &token->string, value);
info = get_sgml_node_info(parser->info->attributes, node);
node->data.attribute.type = info->type;
node->data.attribute.id = !!(info->flags & SGML_ATTRIBUTE_IDENTIFIER);
node->data.attribute.reference = !!(info->flags & SGML_ATTRIBUTE_REFERENCE);
if (valtoken && valtoken->type == SGML_TOKEN_STRING)
node->data.attribute.quoted = 1;
if (!node || !push_dom_node(stack, node))
return;
pop_dom_node(stack);
}
static inline struct dom_node *
add_sgml_proc_instruction(struct dom_stack *stack, struct dom_scanner_token *target,
struct dom_scanner_token *data)
{
struct dom_node *parent = get_dom_stack_top(stack)->node;
struct dom_string *data_str = data ? &data->string : NULL;
struct dom_node *node;
node = add_dom_proc_instruction(parent, &target->string, data_str);
if (!node) return NULL;
switch (target->type) {
case SGML_TOKEN_PROCESS_XML:
node->data.proc_instruction.type = DOM_PROC_INSTRUCTION_XML;
break;
case SGML_TOKEN_PROCESS:
default:
node->data.proc_instruction.type = DOM_PROC_INSTRUCTION;
}
return push_dom_node(stack, node);
}
static inline void
add_sgml_node(struct dom_stack *stack, enum dom_node_type type, struct dom_scanner_token *token)
{
struct dom_node *parent = get_dom_stack_top(stack)->node;
struct dom_node *node = add_dom_node(parent, type, &token->string);
if (!node) return;
if (token->type == SGML_TOKEN_SPACE)
node->data.text.only_space = 1;
if (push_dom_node(stack, node))
pop_dom_node(stack);
}
/* SGML parser main handling: */
static inline void
parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner)
{
struct dom_scanner_token name;
assert(dom_scanner_has_tokens(scanner)
&& (get_dom_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN
|| (get_dom_stack_top(stack)->node->type == DOM_NODE_PROCESSING_INSTRUCTION)));
if (get_dom_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN)
skip_dom_scanner_token(scanner);
while (dom_scanner_has_tokens(scanner)) {
struct dom_scanner_token *token = get_dom_scanner_token(scanner);
assert(token);
switch (token->type) {
case SGML_TOKEN_TAG_END:
skip_dom_scanner_token(scanner);
/* and return */
case SGML_TOKEN_ELEMENT:
case SGML_TOKEN_ELEMENT_BEGIN:
case SGML_TOKEN_ELEMENT_END:
case SGML_TOKEN_ELEMENT_EMPTY_END:
return;
case SGML_TOKEN_IDENT:
copy_struct(&name, token);
/* Skip the attribute name token */
token = get_next_dom_scanner_token(scanner);
if (token && token->type == '=') {
/* If the token is not a valid value token
* ignore it. */
token = get_next_dom_scanner_token(scanner);
if (token
&& token->type != SGML_TOKEN_IDENT
&& token->type != SGML_TOKEN_ATTRIBUTE
&& token->type != SGML_TOKEN_STRING)
token = NULL;
} else {
token = NULL;
}
add_sgml_attribute(stack, &name, token);
/* Skip the value token */
if (token)
skip_dom_scanner_token(scanner);
break;
default:
skip_dom_scanner_token(scanner);
}
}
}
static void
parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
{
struct dom_scanner_token target;
while (dom_scanner_has_tokens(scanner)) {
struct dom_scanner_token *token = get_dom_scanner_token(scanner);
switch (token->type) {
case SGML_TOKEN_ELEMENT:
case SGML_TOKEN_ELEMENT_BEGIN:
if (!add_sgml_element(stack, token)) {
if (token->type == SGML_TOKEN_ELEMENT) {
skip_dom_scanner_token(scanner);
break;
}
skip_sgml_tokens(scanner, SGML_TOKEN_TAG_END);
break;
}
if (token->type == SGML_TOKEN_ELEMENT_BEGIN) {
parse_sgml_attributes(stack, scanner);
} else {
skip_dom_scanner_token(scanner);
}
break;
case SGML_TOKEN_ELEMENT_EMPTY_END:
pop_dom_node(stack);
skip_dom_scanner_token(scanner);
break;
case SGML_TOKEN_ELEMENT_END:
if (!token->string.length) {
pop_dom_node(stack);
} else {
struct dom_string string;
struct dom_stack_state *state;
set_dom_string(&string, token->string.string, token->string.length);
state = search_dom_stack(stack, DOM_NODE_ELEMENT,
&string);
if (state) {
struct sgml_parser_state *pstate;
pstate = get_sgml_parser_state(stack, state);
copy_struct(&pstate->end_token, token);
pop_dom_state(stack, state);
}
}
skip_dom_scanner_token(scanner);
break;
case SGML_TOKEN_NOTATION_COMMENT:
add_sgml_node(stack, DOM_NODE_COMMENT, token);
skip_dom_scanner_token(scanner);
break;
case SGML_TOKEN_NOTATION_ATTLIST:
case SGML_TOKEN_NOTATION_DOCTYPE:
case SGML_TOKEN_NOTATION_ELEMENT:
case SGML_TOKEN_NOTATION_ENTITY:
case SGML_TOKEN_NOTATION:
skip_dom_scanner_token(scanner);
break;
case SGML_TOKEN_CDATA_SECTION:
add_sgml_node(stack, DOM_NODE_CDATA_SECTION, token);
skip_dom_scanner_token(scanner);
break;
case SGML_TOKEN_PROCESS_XML_STYLESHEET:
case SGML_TOKEN_PROCESS_XML:
case SGML_TOKEN_PROCESS:
copy_struct(&target, token);
/* Skip the target token */
token = get_next_dom_scanner_token(scanner);
if (!token) break;
assert(token->type == SGML_TOKEN_PROCESS_DATA);
if (add_sgml_proc_instruction(stack, &target, token)
&& (target.type == SGML_TOKEN_PROCESS_XML
|| target.type == SGML_TOKEN_PROCESS_XML_STYLESHEET)
&& token->string.length > 0) {
/* Parse the <?xml data="attributes"?>. */
struct dom_scanner attr_scanner;
init_dom_scanner(&attr_scanner, &sgml_scanner_info,
&token->string, SGML_STATE_ELEMENT,
scanner->count_lines);
if (dom_scanner_has_tokens(&attr_scanner))
parse_sgml_attributes(stack, &attr_scanner);
}
pop_dom_node(stack);
skip_dom_scanner_token(scanner);
break;
case SGML_TOKEN_ENTITY:
add_sgml_node(stack, DOM_NODE_ENTITY_REFERENCE, token);
skip_dom_scanner_token(scanner);
break;
case SGML_TOKEN_SPACE:
case SGML_TOKEN_TEXT:
default:
add_sgml_node(stack, DOM_NODE_TEXT, token);
skip_dom_scanner_token(scanner);
}
}
}
struct dom_node *
parse_sgml(struct sgml_parser *parser, struct dom_string *buffer)
{
struct sgml_parsing_state *parsing;
if (!parser->root) {
parser->root = add_sgml_document(&parser->stack, &parser->uri);
if (!parser->root)
return NULL;
get_dom_stack_top(&parser->stack)->immutable = 1;
}
parsing = init_sgml_parsing_state(parser, buffer);
if (!parsing) return NULL;
/* FIXME: Make parse_sgml_plain() return something (error code or if
* can be guarenteed a root node). */
parse_sgml_plain(&parser->stack, &parsing->scanner);
pop_dom_node(&parser->parsing);
return parser->root;
}
/* Parsing state management: */
/* The SGML parser can handle nested calls to parse_sgml(). This can be used to
* handle output of external processing of data in the document tree. For
* example this can allows output of the document.write() from DOM scripting
* interface to be parsed. */
static void
sgml_parsing_push(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct sgml_parser *parser = get_sgml_parser(stack);
struct sgml_parsing_state *parsing = data;
parsing->depth = parser->stack.depth;
get_dom_stack_top(&parser->stack)->immutable = 1;
init_dom_scanner(&parsing->scanner, &sgml_scanner_info, &node->string,
SGML_STATE_TEXT, 0);
}
static void
sgml_parsing_pop(struct dom_stack *stack, struct dom_node *node, void *data)
{
struct sgml_parser *parser = get_sgml_parser(stack);
struct sgml_parsing_state *parsing = data;
/* Pop the stack back to the state it was in. This includes cleaning
* away even immutable states left on the stack. */
while (parsing->depth < parser->stack.depth) {
get_dom_stack_top(&parser->stack)->immutable = 0;
pop_dom_node(&parser->stack);
}
assert(parsing->depth == parser->stack.depth);
}
static struct dom_stack_context_info sgml_parsing_context_info = {
/* Object size: */ sizeof(struct sgml_parsing_state),
/* Push: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ NULL,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ sgml_parsing_push,
/* DOM_NODE_CDATA_SECTION */ NULL,
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
},
/* Pop: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ NULL,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ sgml_parsing_pop,
/* DOM_NODE_CDATA_SECTION */ NULL,
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
}
};
/* Create a new parsing state by pushing a new text node containing the*/
static struct sgml_parsing_state *
init_sgml_parsing_state(struct sgml_parser *parser, struct dom_string *buffer)
{
struct dom_stack_state *state;
struct dom_node *node;
node = init_dom_node(DOM_NODE_TEXT, buffer);
if (!node || !push_dom_node(&parser->parsing, node))
return NULL;
state = get_dom_stack_top(&parser->parsing);
return get_dom_stack_state_data(parser->parsing.contexts[0], state);
}
/* Parser creation and destruction: */
/* FIXME: For now the main SGML parser context doesn't do much other than
* declaring the sgml_parser_state object. */
static struct dom_stack_context_info sgml_parser_context_info = {
/* Object size: */ sizeof(struct sgml_parser_state),
/* Push: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ NULL,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ NULL,
/* DOM_NODE_CDATA_SECTION */ NULL,
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
},
/* Pop: */
{
/* */ NULL,
/* DOM_NODE_ELEMENT */ NULL,
/* DOM_NODE_ATTRIBUTE */ NULL,
/* DOM_NODE_TEXT */ NULL,
/* DOM_NODE_CDATA_SECTION */ NULL,
/* DOM_NODE_ENTITY_REFERENCE */ NULL,
/* DOM_NODE_ENTITY */ NULL,
/* DOM_NODE_PROC_INSTRUCTION */ NULL,
/* DOM_NODE_COMMENT */ NULL,
/* DOM_NODE_DOCUMENT */ NULL,
/* DOM_NODE_DOCUMENT_TYPE */ NULL,
/* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
/* DOM_NODE_NOTATION */ NULL,
}
};
struct sgml_parser *
init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype,
struct dom_string *uri)
{
struct sgml_parser *parser;
enum dom_stack_flag flags = 0;
parser = mem_calloc(1, sizeof(*parser));
if (!parser) return NULL;
if (!init_dom_string(&parser->uri, uri->string, uri->length)) {
mem_free(parser);
return NULL;
}
parser->type = type;
parser->info = get_sgml_info(doctype);
if (type == SGML_PARSER_TREE)
flags |= DOM_STACK_KEEP_NODES;
init_dom_stack(&parser->stack, flags);
/* FIXME: Some sgml backend specific callbacks? Handle HTML script tags,
* and feed document.write() data back to the parser. */
add_dom_stack_context(&parser->stack, parser, &sgml_parser_context_info);
/* Don't keep the 'fake' text nodes that holds the parsing data. */
init_dom_stack(&parser->parsing, 0);
add_dom_stack_context(&parser->parsing, parser, &sgml_parsing_context_info);
return parser;
}
void
done_sgml_parser(struct sgml_parser *parser)
{
done_dom_stack(&parser->stack);
done_dom_stack(&parser->parsing);
done_dom_string(&parser->uri);
mem_free(parser);
}

55
src/dom/sgml/parser.h Normal file
View File

@ -0,0 +1,55 @@
#ifndef EL_DOM_SGML_PARSER_H
#define EL_DOM_SGML_PARSER_H
#include "dom/node.h"
#include "dom/stack.h"
#include "dom/sgml/sgml.h"
#include "dom/scanner.h"
struct string;
struct uri;
enum sgml_parser_type {
/* The first one is a DOM tree builder. */
SGML_PARSER_TREE,
/* The second one will simply push nodes on the stack, not building a
* DOM tree. This interface is similar to that of SAX (Simple API for
* XML) where events are fired when nodes are entered and exited. It is
* useful when you are not actually interested in the DOM tree, but can
* do all processing in a stream-like manner, such as when highlighting
* HTML code. */
SGML_PARSER_STREAM,
};
struct sgml_parser_state {
/* Info about the properties of the node contained by state.
* This is only meaningful to element and attribute nodes. For
* unknown nodes it points to the common 'unknown node' info. */
struct sgml_node_info *info;
/* This is used by the DOM source renderer for highlighting the
* end-tag of an element. */
struct dom_scanner_token end_token;
};
struct sgml_parser {
enum sgml_parser_type type; /* Stream or tree */
struct sgml_info *info; /* Backend dependent info */
struct dom_string uri; /* The URI of the DOM document */
struct dom_node *root; /* The document root node */
struct dom_stack stack; /* A stack for tracking parsed nodes */
struct dom_stack parsing; /* Used for tracking parsing states */
};
struct sgml_parser *
init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype,
struct dom_string *uri);
void done_sgml_parser(struct sgml_parser *parser);
struct dom_node *parse_sgml(struct sgml_parser *parser, struct dom_string *buffer);
#endif

View File

@ -0,0 +1,6 @@
top_builddir=../../../..
include $(top_builddir)/Makefile.config
OBJS = rss.o
include $(top_srcdir)/Makefile.lib

View File

@ -0,0 +1,3 @@
/* RSS attributes */
RSS_(ATTRIBUTE, ISPERMALINK, 0),

View File

@ -0,0 +1,10 @@
/* RSS elements */
RSS_(ELEMENT, AUTHOR, 0),
RSS_(ELEMENT, CHANNEL, 0),
RSS_(ELEMENT, DESCRIPTION, 0),
RSS_(ELEMENT, GUID, 0),
RSS_(ELEMENT, ITEM, 0),
RSS_(ELEMENT, LINK, 0),
RSS_(ELEMENT, PUBDATE, 0),
RSS_(ELEMENT, TITLE, 0),

35
src/dom/sgml/rss/rss.c Normal file
View File

@ -0,0 +1,35 @@
/* SGML node handling */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdlib.h>
#include <string.h>
#include "elinks.h"
#include "dom/sgml/rss/rss.h"
#include "dom/sgml/sgml.h"
#define RSS_(node, name, id) SGML_NODE_INFO(RSS, node, name, id)
static struct sgml_node_info rss_attributes[RSS_ATTRIBUTES] = {
SGML_NODE_HEAD(RSS, ATTRIBUTE),
#include "dom/sgml/rss/attribute.inc"
};
static struct sgml_node_info rss_elements[RSS_ELEMENTS] = {
SGML_NODE_HEAD(RSS, ELEMENT),
#include "dom/sgml/rss/element.inc"
};
struct sgml_info sgml_rss_info = {
SGML_DOCTYPE_RSS,
rss_attributes,
rss_elements,
};

28
src/dom/sgml/rss/rss.h Normal file
View File

@ -0,0 +1,28 @@
#ifndef EL_DOM_SGML_RSS_RSS_H
#define EL_DOM_SGML_RSS_RSS_H
#include "dom/sgml/sgml.h"
extern struct sgml_info sgml_rss_info;
#define RSS_(node, name, flags) SGML_NODE_INFO_TYPE(RSS, node, name)
enum rss_element_type {
RSS_ELEMENT_UNKNOWN,
#include "dom/sgml/rss/element.inc"
RSS_ELEMENTS,
};
enum rss_attribute_type {
RSS_ATTRIBUTE_UNKNOWN,
#include "dom/sgml/rss/attribute.inc"
RSS_ATTRIBUTES,
};
#undef RSS_
#endif

519
src/dom/sgml/scanner.c Normal file
View File

@ -0,0 +1,519 @@
/* SGML token scanner utilities */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <string.h>
#include "elinks.h"
#include "dom/scanner.h"
#include "dom/sgml/scanner.h"
#include "dom/string.h"
#include "util/error.h"
/* Bitmap entries for the SGML character groups used in the scanner table */
enum sgml_char_group {
SGML_CHAR_ENTITY = (1 << 1),
SGML_CHAR_IDENT = (1 << 2),
SGML_CHAR_NEWLINE = (1 << 3),
SGML_CHAR_WHITESPACE = (1 << 4),
SGML_CHAR_NOT_TEXT = (1 << 5),
SGML_CHAR_NOT_ATTRIBUTE = (1 << 6),
};
static struct dom_scan_table_info sgml_scan_table_info[] = {
DOM_SCAN_TABLE_RANGE("0", '9', SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
DOM_SCAN_TABLE_RANGE("A", 'Z', SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
DOM_SCAN_TABLE_RANGE("a", 'z', SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
/* For the octal number impared (me including) \241 is 161 --jonas */
DOM_SCAN_TABLE_RANGE("\241", 255, SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
DOM_SCAN_TABLE_STRING("-_:.", SGML_CHAR_IDENT | SGML_CHAR_ENTITY),
DOM_SCAN_TABLE_STRING("#", SGML_CHAR_ENTITY),
DOM_SCAN_TABLE_STRING(" \f\n\r\t\v", SGML_CHAR_WHITESPACE),
DOM_SCAN_TABLE_STRING("\f\n", SGML_CHAR_NEWLINE),
DOM_SCAN_TABLE_STRING("<&", SGML_CHAR_NOT_TEXT),
DOM_SCAN_TABLE_STRING("<=>", SGML_CHAR_NOT_ATTRIBUTE),
DOM_SCAN_TABLE_END,
};
#define SGML_STRING_MAP(str, type, family) \
{ INIT_DOM_STRING(str, -1), SGML_TOKEN_##type, SGML_TOKEN_##family }
static struct dom_scanner_string_mapping sgml_string_mappings[] = {
SGML_STRING_MAP("--", NOTATION_COMMENT, NOTATION),
SGML_STRING_MAP("ATTLIST", NOTATION_ATTLIST, NOTATION),
SGML_STRING_MAP("DOCTYPE", NOTATION_DOCTYPE, NOTATION),
SGML_STRING_MAP("ELEMENT", NOTATION_ELEMENT, NOTATION),
SGML_STRING_MAP("ENTITY", NOTATION_ENTITY, NOTATION),
SGML_STRING_MAP("xml", PROCESS_XML, PROCESS),
SGML_STRING_MAP("xml-stylesheet", PROCESS_XML_STYLESHEET, PROCESS),
DOM_STRING_MAP_END,
};
static struct dom_scanner_token *scan_sgml_tokens(struct dom_scanner *scanner);
struct dom_scanner_info sgml_scanner_info = {
sgml_string_mappings,
sgml_scan_table_info,
scan_sgml_tokens,
};
#define check_sgml_table(c, bit) (sgml_scanner_info.scan_table[(c)] & (bit))
#define scan_sgml(scanner, s, bit) \
while ((s) < (scanner)->end && check_sgml_table(*(s), bit)) (s)++;
#define is_sgml_ident(c) check_sgml_table(c, SGML_CHAR_IDENT)
#define is_sgml_entity(c) check_sgml_table(c, SGML_CHAR_ENTITY)
#define is_sgml_space(c) check_sgml_table(c, SGML_CHAR_WHITESPACE)
#define is_sgml_newline(c) check_sgml_table(c, SGML_CHAR_NEWLINE)
#define is_sgml_text(c) !check_sgml_table(c, SGML_CHAR_NOT_TEXT)
#define is_sgml_token_start(c) check_sgml_table(c, SGML_CHAR_TOKEN_START)
#define is_sgml_attribute(c) !check_sgml_table(c, SGML_CHAR_NOT_ATTRIBUTE | SGML_CHAR_WHITESPACE)
static inline void
skip_sgml_space(struct dom_scanner *scanner, unsigned char **string)
{
unsigned char *pos = *string;
if (!scanner->count_lines) {
scan_sgml(scanner, pos, SGML_CHAR_WHITESPACE);
} else {
while (pos < scanner->end && is_sgml_space(*pos)) {
if (is_sgml_newline(*pos))
scanner->lineno++;
pos++;
}
}
*string = pos;
}
/* Text token scanning */
/* I think it is faster to not check the table here --jonas */
#define foreach_sgml_cdata(scanner, str) \
for (; ((str) < (scanner)->end && *(str) != '<' && *(str) != '&'); (str)++)
static inline void
scan_sgml_text_token(struct dom_scanner *scanner, struct dom_scanner_token *token)
{
unsigned char *string = scanner->position;
unsigned char first_char = *string;
enum sgml_token_type type = SGML_TOKEN_GARBAGE;
int real_length = -1;
/* In scan_sgml_tokens() we check that first_char != '<' */
assert(first_char != '<' && scanner->state == SGML_STATE_TEXT);
token->string.string = string++;
if (first_char == '&') {
if (is_sgml_entity(*string)) {
scan_sgml(scanner, string, SGML_CHAR_ENTITY);
type = SGML_TOKEN_ENTITY;
token->string.string++;
real_length = string - token->string.string;
}
foreach_sgml_cdata (scanner, string) {
if (*string == ';') {
string++;
break;
}
}
} else {
if (is_sgml_space(first_char)) {
skip_sgml_space(scanner, &string);
type = string < scanner->end && is_sgml_text(*string)
? SGML_TOKEN_TEXT : SGML_TOKEN_SPACE;
} else {
type = SGML_TOKEN_TEXT;
}
foreach_sgml_cdata (scanner, string) {
/* m33p */;
}
}
token->type = type;
token->string.length = real_length >= 0 ? real_length : string - token->string.string;
token->precedence = get_sgml_precedence(type);
scanner->position = string;
}
/* Element scanning */
/* Check whether it is safe to skip the @token when looking for @skipto. */
static inline int
check_sgml_precedence(int type, int skipto)
{
return get_sgml_precedence(type) <= get_sgml_precedence(skipto);
}
/* Skip until @skipto is found, without taking precedence into account. */
static inline unsigned char *
skip_sgml_chars(struct dom_scanner *scanner, unsigned char *string,
unsigned char skipto)
{
int newlines;
assert(string >= scanner->position && string <= scanner->end);
if (!scanner->count_lines) {
size_t length = scanner->end - string;
return memchr(string, skipto, length);
}
for (newlines = 0; string < scanner->end; string++) {
if (is_sgml_newline(*string))
newlines++;
if (*string == skipto) {
/* Only count newlines if we actually find the
* requested char. Else callers are assumed to discard
* the scanning. */
scanner->lineno += newlines;
return string;
}
}
return NULL;
}
/* XXX: Only element or ``in tag'' precedence is handled correctly however
* using this function for CDATA or text would be overkill. */
static inline unsigned char *
skip_sgml(struct dom_scanner *scanner, unsigned char **string, unsigned char skipto,
int check_quoting)
{
unsigned char *pos = *string;
for (; pos < scanner->end; pos++) {
if (*pos == skipto) {
*string = pos + 1;
return pos;
}
if (!check_sgml_precedence(*pos, skipto))
break;
if (check_quoting && isquote(*pos)) {
unsigned char *end;
end = skip_sgml_chars(scanner, pos + 1, *pos);
if (end) pos = end;
} else if (scanner->count_lines && is_sgml_newline(*pos)) {
scanner->lineno++;
}
}
*string = pos;
return NULL;
}
static inline int
skip_sgml_comment(struct dom_scanner *scanner, unsigned char **string)
{
unsigned char *pos = *string;
int length = 0;
for ( ; (pos = skip_sgml_chars(scanner, pos, '>')); pos++) {
/* It is always safe to access index -2 and -1 here since we
* are supposed to have '<!--' before this is called. We do
* however need to check that the '-->' are not overlapping any
* preceeding '-'. */
if (pos[-2] == '-' && pos[-1] == '-' && &pos[-2] >= *string) {
length = pos - *string - 2;
pos++;
break;
}
}
if (!pos) {
pos = scanner->end;
length = pos - *string;
}
*string = pos;
return length;
}
static inline int
skip_sgml_cdata_section(struct dom_scanner *scanner, unsigned char **string)
{
unsigned char *pos = *string;
int length = 0;
for ( ; (pos = skip_sgml_chars(scanner, pos, '>')); pos++) {
/* It is always safe to access index -2 and -1 here since we
* are supposed to have '<![CDATA[' before this is called. */
if (pos[-2] == ']' && pos[-1] == ']') {
length = pos - *string - 2;
pos++;
break;
}
}
if (!pos) {
pos = scanner->end;
length = pos - *string;
}
*string = pos;
return length;
}
#define scan_sgml_attribute(scanner, str) \
while ((str) < (scanner)->end && is_sgml_attribute(*(str))) \
(str)++;
static inline void
scan_sgml_element_token(struct dom_scanner *scanner, struct dom_scanner_token *token)
{
unsigned char *string = scanner->position;
unsigned char first_char = *string;
enum sgml_token_type type = SGML_TOKEN_GARBAGE;
int real_length = -1;
token->string.string = string++;
if (first_char == '<') {
skip_sgml_space(scanner, &string);
if (scanner->state == SGML_STATE_ELEMENT) {
/* Already inside an element so insert a tag end token
* and continue scanning in next iteration. */
string--;
real_length = 0;
type = SGML_TOKEN_TAG_END;
scanner->state = SGML_STATE_TEXT;
} else if (is_sgml_ident(*string)) {
token->string.string = string;
scan_sgml(scanner, string, SGML_CHAR_IDENT);
real_length = string - token->string.string;
skip_sgml_space(scanner, &string);
if (*string == '>') {
type = SGML_TOKEN_ELEMENT;
string++;
} else {
scanner->state = SGML_STATE_ELEMENT;
type = SGML_TOKEN_ELEMENT_BEGIN;
}
} else if (*string == '!') {
unsigned char *ident;
enum sgml_token_type base = SGML_TOKEN_NOTATION;
string++;
skip_sgml_space(scanner, &string);
token->string.string = ident = string;
if (string + 1 < scanner->end
&& string[0] == '-' && string[1] == '-') {
string += 2;
type = SGML_TOKEN_NOTATION_COMMENT;
token->string.string = string;
real_length = skip_sgml_comment(scanner, &string);
assert(real_length >= 0);
} else if (string + 6 < scanner->end
&& !memcmp(string, "[CDATA[", 7)) {
string += 7;
type = SGML_TOKEN_CDATA_SECTION;
token->string.string = string;
real_length = skip_sgml_cdata_section(scanner, &string);
assert(real_length >= 0);
} else {
skip_sgml_space(scanner, &string);
type = map_dom_scanner_string(scanner, ident, string, base);
skip_sgml(scanner, &string, '>', 0);
}
} else if (*string == '?') {
unsigned char *pos;
enum sgml_token_type base = SGML_TOKEN_PROCESS;
string++;
skip_sgml_space(scanner, &string);
token->string.string = pos = string;
scan_sgml(scanner, string, SGML_CHAR_IDENT);
type = map_dom_scanner_string(scanner, pos, string, base);
scanner->state = SGML_STATE_PROC_INST;
} else if (*string == '/') {
string++;
skip_sgml_space(scanner, &string);
if (is_sgml_ident(*string)) {
token->string.string = string;
scan_sgml(scanner, string, SGML_CHAR_IDENT);
real_length = string - token->string.string;
type = SGML_TOKEN_ELEMENT_END;
skip_sgml(scanner, &string, '>', 1);
} else if (*string == '>') {
string++;
real_length = 0;
type = SGML_TOKEN_ELEMENT_END;
}
if (type != SGML_TOKEN_GARBAGE)
scanner->state = SGML_STATE_TEXT;
} else {
/* Alien < > stuff so ignore it */
skip_sgml(scanner, &string, '>', 0);
}
} else if (first_char == '=') {
type = '=';
} else if (first_char == '?' || first_char == '>') {
if (first_char == '?') {
skip_sgml(scanner, &string, '>', 0);
}
type = SGML_TOKEN_TAG_END;
assert(scanner->state == SGML_STATE_ELEMENT);
scanner->state = SGML_STATE_TEXT;
} else if (first_char == '/') {
if (*string == '>') {
string++;
real_length = 0;
type = SGML_TOKEN_ELEMENT_EMPTY_END;
assert(scanner->state == SGML_STATE_ELEMENT);
scanner->state = SGML_STATE_TEXT;
} else if (is_sgml_attribute(*string)) {
scan_sgml_attribute(scanner, string);
type = SGML_TOKEN_ATTRIBUTE;
if (string[-1] == '/' && string[0] == '>')
string--;
}
} else if (isquote(first_char)) {
unsigned char *string_end = skip_sgml_chars(scanner, string, first_char);
if (string_end) {
/* We don't want the delimiters in the token */
token->string.string++;
real_length = string_end - token->string.string;
string = string_end + 1;
type = SGML_TOKEN_STRING;
} else if (is_sgml_attribute(*string)) {
token->string.string++;
scan_sgml_attribute(scanner, string);
type = SGML_TOKEN_ATTRIBUTE;
}
} else if (is_sgml_attribute(first_char)) {
if (is_sgml_ident(first_char)) {
scan_sgml(scanner, string, SGML_CHAR_IDENT);
type = SGML_TOKEN_IDENT;
}
if (is_sgml_attribute(*string)) {
scan_sgml_attribute(scanner, string);
type = SGML_TOKEN_ATTRIBUTE;
if (string[-1] == '/' && string[0] == '>')
string--;
}
}
token->type = type;
token->string.length = real_length >= 0 ? real_length : string - token->string.string;
token->precedence = get_sgml_precedence(type);
scanner->position = string;
}
/* Processing instruction data scanning */
static inline void
scan_sgml_proc_inst_token(struct dom_scanner *scanner, struct dom_scanner_token *token)
{
unsigned char *string = scanner->position;
token->string.string = string;
/* Figure out where the processing instruction ends. This doesn't use
* skip_sgml() since we MUST ignore precedence here to allow '<' inside
* the data part to be skipped correctly. */
for ( ; (string = skip_sgml_chars(scanner, string, '>')); string++) {
if (string[-1] == '?') {
string++;
break;
}
}
if (!string) string = scanner->end;
token->type = SGML_TOKEN_PROCESS_DATA;
token->string.length = string - token->string.string - 2;
token->precedence = get_sgml_precedence(token->type);
scanner->position = string;
scanner->state = SGML_STATE_TEXT;
}
/* Scanner multiplexor */
static struct dom_scanner_token *
scan_sgml_tokens(struct dom_scanner *scanner)
{
struct dom_scanner_token *table_end = scanner->table + DOM_SCANNER_TOKENS;
struct dom_scanner_token *current;
if (!begin_dom_token_scanning(scanner))
return get_dom_scanner_token(scanner);
/* Scan tokens until we fill the table */
for (current = scanner->table + scanner->tokens;
current < table_end && scanner->position < scanner->end;
current++) {
if (scanner->state == SGML_STATE_ELEMENT
|| (*scanner->position == '<'
&& scanner->state != SGML_STATE_PROC_INST)) {
skip_sgml_space(scanner, &scanner->position);
if (scanner->position >= scanner->end) break;
scan_sgml_element_token(scanner, current);
/* Shall we scratch this token? */
if (current->type == SGML_TOKEN_SKIP) {
current--;
}
} else if (scanner->state == SGML_STATE_TEXT) {
scan_sgml_text_token(scanner, current);
} else {
skip_sgml_space(scanner, &scanner->position);
scan_sgml_proc_inst_token(scanner, current);
}
}
return end_dom_token_scanning(scanner, current);
}

View File

@ -1,8 +1,8 @@
#ifndef EL__DOCUMENT_SGML_SCANNER_H
#define EL__DOCUMENT_SGML_SCANNER_H
#ifndef EL_DOM_SGML_SCANNER_H
#define EL_DOM_SGML_SCANNER_H
#include "util/scanner.h"
#include "dom/scanner.h"
enum sgml_token_type {
/* Char tokens: */
@ -25,8 +25,12 @@ enum sgml_token_type {
SGML_TOKEN_NOTATION_ENTITY, /* <!ENTITY until > */
SGML_TOKEN_NOTATION_ATTLIST, /* <!ATTLIST until > */
SGML_TOKEN_PROCESS, /* <?{ident} until ?> */
SGML_TOKEN_PROCESS_XML, /* <?xml until */
SGML_TOKEN_CDATA_SECTION, /* <![CDATA[ until ]]> */
SGML_TOKEN_PROCESS, /* <?{ident} */
SGML_TOKEN_PROCESS_XML, /* <?xml */
SGML_TOKEN_PROCESS_XML_STYLESHEET,/* <?xml-stylesheet */
SGML_TOKEN_PROCESS_DATA, /* data after <?{ident} until ?> */
SGML_TOKEN_ELEMENT, /* <{ident}> */
SGML_TOKEN_ELEMENT_BEGIN, /* <{ident} */
@ -54,7 +58,18 @@ enum sgml_token_type {
SGML_TOKEN_NONE = 0,
};
extern struct scanner_info sgml_scanner_info;
/* The SGML tokenizer maintains a state (in the scanner->state member) that can
* be either text, element, or processing instruction state. The state has only
* meaning while doing the actual scanning and should not be used at the
* parsing time. It can however be used to initialize the scanner to a specific
* state. */
enum sgml_scanner_state {
SGML_STATE_TEXT,
SGML_STATE_ELEMENT,
SGML_STATE_PROC_INST,
};
extern struct dom_scanner_info sgml_scanner_info;
/* Treat '<' as more valuable then '>' so that scanning of '<a<b>' using
* skipping to next '>' will stop at the second '<'. */
@ -63,6 +78,6 @@ extern struct scanner_info sgml_scanner_info;
(token_type) == '>' ? (1 << 10) : 0)
#define skip_sgml_tokens(scanner, type) \
skip_scanner_tokens(scanner, type, get_sgml_precedence(type))
skip_dom_scanner_tokens(scanner, type, get_sgml_precedence(type))
#endif

43
src/dom/sgml/sgml.c Normal file
View File

@ -0,0 +1,43 @@
/* SGML generics */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdlib.h>
#include <string.h>
#include "elinks.h"
#include "dom/node.h"
#include "dom/sgml/sgml.h"
#include "dom/string.h"
#include "util/error.h"
/* Backend includes: */
#include "dom/sgml/html/html.h"
#include "dom/sgml/rss/rss.h"
#include "dom/sgml/xbel/xbel.h"
int
sgml_info_strcmp(const void *key_, const void *node_)
{
struct dom_node *key = (struct dom_node *) key_;
struct sgml_node_info *node = (struct sgml_node_info *) node_;
return dom_string_casecmp(&key->string, &node->string);
}
struct sgml_info *sgml_info[SGML_DOCTYPES] = {
&sgml_html_info,
&sgml_rss_info,
&sgml_xbel_info,
};
struct sgml_info *
get_sgml_info(enum sgml_document_type doctype)
{
return doctype < SGML_DOCTYPES ? sgml_info[doctype] : NULL;
}

View File

@ -1,10 +1,11 @@
#ifndef EL__DOCUMENT_SGML_SGML_H
#define EL__DOCUMENT_SGML_SGML_H
#ifndef EL_DOM_SGML_SGML_H
#define EL_DOM_SGML_SGML_H
#include <stdlib.h>
#include "document/dom/stack.h"
#include "dom/stack.h"
#include "dom/string.h"
/* The flags stored in the attribute sgml node info data */
/* TODO: Other potential flags (there can be only 16)
@ -43,20 +44,21 @@ enum sgml_element_flags {
};
struct sgml_node_info {
unsigned char *string;
int length;
struct dom_string string;
uint16_t type;
uint16_t flags;
};
/* The header node is special. It is used for storing the number of nodes and
* for returning the default 'unknown' node. */
#define SGML_NODE_HEAD(doctype, nodetype) \
{ NULL, doctype##_##nodetype##S - 1, doctype##_##nodetype##_UNKNOWN }
{ INIT_DOM_STRING(NULL, doctype##_##nodetype##S - 1), doctype##_##nodetype##_UNKNOWN }
#define SGML_NODE_INFO(doctype, nodetype, name, data) \
{ #name, sizeof(#name) - 1, doctype##_##nodetype##_##name, data }
{ INIT_DOM_STRING(#name, sizeof(#name) - 1), doctype##_##nodetype##_##name, data }
#define SGML_NODE_INF2(doctype, nodetype, name, ident, data) \
{ ident, sizeof(ident) - 1, doctype##_##nodetype##_##name, data }
{ INIT_DOM_STRING(ident, sizeof(ident) - 1), doctype##_##nodetype##_##name, data }
#define SGML_NODE_INFO_TYPE(doctype, nodetype, name) doctype##_##nodetype##_##name
@ -66,16 +68,27 @@ static inline struct sgml_node_info *
get_sgml_node_info(struct sgml_node_info list[], struct dom_node *node)
{
struct sgml_node_info *map = &list[1];
size_t map_size = list->length;
size_t map_size = list->string.length;
size_t obj_size = sizeof(struct sgml_node_info);
void *match = bsearch(node, map, map_size, obj_size, sgml_info_strcmp);
return match ? match : list;
}
enum sgml_document_type {
SGML_DOCTYPE_HTML,
SGML_DOCTYPE_RSS,
SGML_DOCTYPE_XBEL,
SGML_DOCTYPES,
};
struct sgml_info {
enum sgml_document_type doctype;
struct sgml_node_info *attributes;
struct sgml_node_info *elements;
};
struct sgml_info *get_sgml_info(enum sgml_document_type doctype);
#endif

View File

@ -0,0 +1,6 @@
top_builddir=../../../..
include $(top_builddir)/Makefile.config
OBJS = xbel.o
include $(top_srcdir)/Makefile.lib

View File

@ -0,0 +1,14 @@
/* XBEL attributes */
/* http://pyxml.sourceforge.net/topics/xbel/docs/html/public-text.html */
#undef VERSION
XBEL_(ATTRIBUTE, ADDED, 0),
XBEL_(ATTRIBUTE, FOLDED, 0),
XBEL_(ATTRIBUTE, HREF, SGML_ATTRIBUTE_REFERENCE),
XBEL_(ATTRIBUTE, ID, SGML_ATTRIBUTE_IDENTIFIER),
XBEL_(ATTRIBUTE, MODIFIED, 0),
XBEL_(ATTRIBUTE, OWNER, 0),
XBEL_(ATTRIBUTE, REF, 0),
XBEL_(ATTRIBUTE, VERSION, 0),
XBEL_(ATTRIBUTE, VISITED, 0),

Some files were not shown because too many files have changed in this diff Show More