1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-11-04 08:17:17 -05:00
elinks/src/document/renderer.c
Witold Filipczyk d09bdf7c28 Fix problem with double decompression of local files
ELinks tries decompress local gziped files twice, which causes problems
with some files eg. with this README.gz. This patch prevents it.
2005-12-18 17:03:34 +01:00

601 lines
17 KiB
C

/* HTML renderer */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <ctype.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include "elinks.h"
#include "cache/cache.h"
#include "config/options.h"
#include "document/document.h"
#include "document/dom/renderer.h"
#include "document/html/frames.h"
#include "document/html/renderer.h"
#include "document/plain/renderer.h"
#include "document/renderer.h"
#include "document/view.h"
#include "ecmascript/ecmascript.h"
#include "encoding/encoding.h"
#include "intl/charsets.h"
#include "main/main.h"
#include "main/object.h"
#include "protocol/header.h"
#include "protocol/protocol.h"
#include "protocol/uri.h"
#include "session/location.h"
#include "session/session.h"
#include "terminal/terminal.h"
#include "terminal/window.h"
#include "util/error.h"
#include "util/memory.h"
#include "util/string.h"
#include "viewer/text/view.h"
#include "viewer/text/vs.h"
static void sort_links(struct document *document);
#ifdef CONFIG_ECMASCRIPT
/* XXX: This function is de facto obsolete, since we do not need to copy
* snippets around anymore (we process them in one go after the document is
* loaded; gradual processing was practically impossible because the snippets
* could reorder randomly during the loading - consider i.e.
* <body onLoad><script></body>: first just <body> is loaded, but then the
* rest of the document is loaded and <script> gets before <body>; do not even
* imagine the trouble with rewritten (through scripting hooks) documents;
* besides, implementing document.write() will be much simpler).
* But I want to take no risk by reworking that now. --pasky */
static void
add_snippets(struct ecmascript_interpreter *interpreter,
struct list_head *doc_snippets, struct list_head *queued_snippets)
{
struct string_list_item *doc_current = doc_snippets->next;
#ifdef CONFIG_LEDS
if (list_empty(*queued_snippets) && interpreter->vs->doc_view->session)
unset_led_value(interpreter->vs->doc_view->session->status.ecmascript_led);
#endif
if (list_empty(*doc_snippets) || !get_opt_bool("ecmascript.enable"))
return;
/* We do this all only once per view_state now. */
if (!list_empty(*queued_snippets)) {
/* So if we already did it, we shouldn't need to do it again.
* This is the case of moving around in history - we have all
* what happenned recorded in the view_state and needn't bother
* again. */
#ifdef CONFIG_DEBUG
/* Hopefully. */
struct string_list_item *iterator = queued_snippets->next;
while (iterator != (struct string_list_item *) queued_snippets) {
if (doc_current == (struct string_list_item *) doc_snippets) {
INTERNAL("add_snippets(): doc_snippets shorter than queued_snippets!");
return;
}
#if 0
DBG("Comparing snippets\n%.*s\n###### vs #####\n%.*s\n #####",
iterator->string.length, iterator->string.source,
doc_current->string.length, doc_current->string.source);
#endif
assert(!strlcmp(iterator->string.source,
iterator->string.length,
doc_current->string.source,
doc_current->string.length));
doc_current = doc_current->next;
iterator = iterator->next;
}
#endif
return;
}
assert(doc_current);
for (; doc_current != (struct string_list_item *) doc_snippets;
doc_current = doc_current->next) {
add_to_string_list(queued_snippets, doc_current->string.source,
doc_current->string.length);
#if 0
DBG("Adding snippet\n%.*s\n #####",
doc_current->string.length,
doc_current->string.source);
#endif
}
}
static void
process_snippets(struct ecmascript_interpreter *interpreter,
struct list_head *snippets, struct string_list_item **current)
{
if (!*current)
*current = snippets->next;
for (; *current != (struct string_list_item *) snippets;
(*current) = (*current)->next) {
struct string *string = &(*current)->string;
unsigned char *uristring;
struct uri *uri;
struct cache_entry *cached;
struct fragment *fragment;
if (string->length == 0)
continue;
if (*string->source != '^') {
/* Evaluate <script>code</script> snippet */
ecmascript_eval(interpreter, string);
continue;
}
/* Eval external <script src="reference"></script> snippet */
uristring = string->source + 1;
if (!*uristring) continue;
uri = get_uri(uristring, URI_BASE);
if (!uri) continue;
cached = get_redirected_cache_entry(uri);
done_uri(uri);
if (!cached) {
/* At this time (!gradual_rerendering), we should've
* already retrieved this though. So it must've been
* that it went away because unused and the cache was
* already too full. */
#if 0
/* Disabled because gradual rerendering can be triggered
* by numerous events other than a ecmascript reference
* completing like the original document and CSS. Problem
* is that we should never continue this loop but rather
* break out if that is the case. Somehow we need to
* be able to derive URI loading problems at this point
* or maybe remove reference snippets if they fail to load.
*
* This FIFO queue handling should be used for also CSS
* imports so it would be cool if it could be general
* enough for that. Using it for frames with the FIFOing
* disabled probably wouldn't hurt either.
*
* To top this thing off it would be nice if it also
* handled dependency tracking between references so that
* CSS documents will not disappear from the cache
* before all referencing HTML documents has been deleted
* from it.
*
* Reported as bug 533. */
/* Pasky's explanation: If we get the doc in a single
* shot, before calling draw_formatted() we didn't have
* anything additional queued for loading and the cache
* entry was already loaded, so we didn't get
* gradual_loading set. But then while parsing the
* document we got some external references and trying
* to process them right now. Boom.
*
* The obvious solution would be to always call
* draw_formatted() with gradual_loading in
* doc_loading_callback() and if we are sure the
* loading is really over, call it one more time
* without gradual_loading set. I'm not sure about
* the implications though so I won't do it before
* 0.10.0. --pasky */
ERROR("The script of %s was lost in too full a cache!",
uristring);
#endif
continue;
}
fragment = get_cache_fragment(cached);
if (fragment) {
struct string code = INIT_STRING(fragment->data, fragment->length);
ecmascript_eval(interpreter, &code);
}
}
}
#endif
static void
render_encoded_document(struct cache_entry *cached, struct document *document)
{
struct uri *uri = cached->uri;
enum stream_encoding encoding = ENCODING_NONE;
struct fragment *fragment = get_cache_fragment(cached);
struct string buffer = INIT_STRING("", 0);
/* Even empty documents have to be rendered so that info in the protocol
* header, such as refresh info, get processed. (bug 625) */
if (fragment) {
buffer.source = fragment->data;
buffer.length = fragment->length;
}
if (uri->protocol != PROTOCOL_FILE) {
unsigned char *extension = get_extension_from_uri(uri);
if (extension) {
encoding = guess_encoding(extension);
mem_free(extension);
}
if (encoding != ENCODING_NONE) {
int length = 0;
unsigned char *source;
source = decode_encoded_buffer(encoding, buffer.source,
buffer.length, &length);
if (source) {
buffer.source = source;
buffer.length = length;
} else {
encoding = ENCODING_NONE;
}
}
}
if (document->options.plain) {
#ifdef CONFIG_DOM
if (cached->content_type
&& !strlcasecmp("text/html", 9, cached->content_type, -1))
render_dom_document(cached, document, &buffer);
else
#endif
render_plain_document(cached, document, &buffer);
} else {
render_html_document(cached, document, &buffer);
}
if (encoding != ENCODING_NONE) {
done_string(&buffer);
}
}
void
render_document(struct view_state *vs, struct document_view *doc_view,
struct document_options *options)
{
unsigned char *name;
struct document *document;
struct cache_entry *cached;
assert(vs && doc_view && options);
if_assert_failed return;
#if 0
DBG("(Re%u)Rendering %s on doc_view %p [%s] while attaching it to %p",
options->gradual_rerendering, struri(vs->uri),
doc_view, doc_view->name, vs);
#endif
name = doc_view->name;
doc_view->name = NULL;
detach_formatted(doc_view);
doc_view->name = name;
doc_view->vs = vs;
doc_view->last_x = doc_view->last_y = -1;
#if 0
/* This is a nice idea, but doesn't always work: in particular when
* there's a frame name conflict. You loaded something to the vs'
* frame, but later something tried to get loaded to a frame with
* the same name and we got back this frame again, so we are now
* overriding the original document with a cuckoo. This assert()ion
* should be re-enabled when we start to get this right (which is
* very complex, but someone should rewrite the frames support
* anyway). --pasky */
assert(!vs->doc_view);
#else
if (vs->doc_view) {
/* It will be still detached, no worries - hopefully it still
* resides in ses->scrn_frames. */
assert(vs->doc_view->vs == vs);
vs->doc_view->used = 0; /* A bit risky, but... */
vs->doc_view->vs = NULL;
vs->doc_view = NULL;
#ifdef CONFIG_ECMASCRIPT
vs->ecmascript_fragile = 1; /* And is this good? ;-) */
#endif
}
#endif
vs->doc_view = doc_view;
cached = find_in_cache(vs->uri);
if (!cached) {
INTERNAL("document %s to format not found", struri(vs->uri));
return;
}
document = get_cached_document(cached, options);
if (document) {
doc_view->document = document;
} else {
document = init_document(cached, options);
if (!document) return;
doc_view->document = document;
shrink_memory(0);
render_encoded_document(cached, document);
sort_links(document);
if (!document->title) {
enum uri_component components;
if (document->uri->protocol == PROTOCOL_FILE) {
components = URI_PATH;
} else {
components = URI_PUBLIC;
}
document->title = get_uri_string(document->uri, components);
if (document->title)
decode_uri_for_display(document->title);
}
#ifdef CONFIG_CSS
document->css_magic = get_document_css_magic(document);
#endif
}
#ifdef CONFIG_ECMASCRIPT
if (!vs->ecmascript_fragile)
assert(vs->ecmascript);
if (!options->gradual_rerendering) {
/* We also reset the state if the underlying document changed
* from the last time we did the snippets. This may be
* triggered i.e. when redrawing a document which has been
* reloaded in a different tab meanwhile (OTOH we don't want
* to reset the state if we are redrawing a document we have
* already drawn before).
*
* (vs->ecmascript->onload_snippets_owner) check may be
* superfluous since we should always have
* vs->ecmascript_fragile set in those cases; that's why we
* don't ever bother to re-zero it if we are suddenly doing
* gradual rendering again.
*
* XXX: What happens if a document is still loading in the
* other tab when we press ^L here? */
if (vs->ecmascript_fragile
|| (vs->ecmascript && vs->ecmascript->onload_snippets_owner
&& document->id != vs->ecmascript->onload_snippets_owner))
ecmascript_reset_state(vs);
assert(vs->ecmascript);
vs->ecmascript->onload_snippets_owner = document->id;
/* Passing of the onload_snippets pointers gives *_snippets()
* some feeling of universality, shall we ever get any other
* snippets (?). */
add_snippets(vs->ecmascript,
&document->onload_snippets,
&vs->ecmascript->onload_snippets);
process_snippets(vs->ecmascript, &vs->ecmascript->onload_snippets,
&vs->ecmascript->current_onload_snippet);
}
#endif
/* If we do not care about the height and width of the document
* just use the setup values. */
copy_box(&doc_view->box, &document->options.box);
if (!document->options.needs_width)
doc_view->box.width = options->box.width;
if (!document->options.needs_height)
doc_view->box.height = options->box.height;
}
void
render_document_frames(struct session *ses, int no_cache)
{
struct document_options doc_opts;
struct document_view *doc_view;
struct document_view *current_doc_view = NULL;
struct view_state *vs = NULL;
if (!ses->doc_view) {
ses->doc_view = mem_calloc(1, sizeof(*ses->doc_view));
if (!ses->doc_view) return;
ses->doc_view->session = ses;
ses->doc_view->search_word = &ses->search_word;
}
if (have_location(ses)) vs = &cur_loc(ses)->vs;
init_document_options(&doc_opts);
set_box(&doc_opts.box, 0, 0,
ses->tab->term->width, ses->tab->term->height);
if (ses->status.show_title_bar) {
doc_opts.box.y++;
doc_opts.box.height--;
}
if (ses->status.show_status_bar) doc_opts.box.height--;
if (ses->status.show_tabs_bar) doc_opts.box.height--;
doc_opts.color_mode = get_opt_int_tree(ses->tab->term->spec, "colors");
if (!get_opt_bool_tree(ses->tab->term->spec, "underline"))
doc_opts.color_flags |= COLOR_ENHANCE_UNDERLINE;
doc_opts.cp = get_opt_codepage_tree(ses->tab->term->spec, "charset");
doc_opts.no_cache = no_cache & 1;
doc_opts.gradual_rerendering = !!(no_cache & 2);
if (vs) {
if (vs->plain < 0) vs->plain = 0;
doc_opts.plain = vs->plain;
doc_opts.wrap = vs->wrap;
} else {
doc_opts.plain = 1;
}
foreach (doc_view, ses->scrn_frames) doc_view->used = 0;
if (vs) render_document(vs, ses->doc_view, &doc_opts);
if (document_has_frames(ses->doc_view->document)) {
current_doc_view = current_frame(ses);
format_frames(ses, ses->doc_view->document->frame_desc, &doc_opts, 0);
}
foreach (doc_view, ses->scrn_frames) {
struct document_view *prev_doc_view = doc_view->prev;
if (doc_view->used) continue;
detach_formatted(doc_view);
del_from_list(doc_view);
mem_free(doc_view);
doc_view = prev_doc_view;
}
if (current_doc_view) {
int n = 0;
foreach (doc_view, ses->scrn_frames) {
if (document_has_frames(doc_view->document)) continue;
if (doc_view == current_doc_view) {
cur_loc(ses)->vs.current_link = n;
break;
}
n++;
}
}
}
static int
comp_links(struct link *l1, struct link *l2)
{
assert(l1 && l2);
if_assert_failed return 0;
return (l1->number - l2->number);
}
static void
sort_links(struct document *document)
{
int i;
assert(document);
if_assert_failed return;
if (!document->nlinks) return;
assert(document->links);
if_assert_failed return;
qsort(document->links, document->nlinks, sizeof(*document->links),
(void *) comp_links);
if (!document->height) return;
document->lines1 = mem_calloc(document->height, sizeof(*document->lines1));
if (!document->lines1) return;
document->lines2 = mem_calloc(document->height, sizeof(*document->lines2));
if (!document->lines2) {
mem_free(document->lines1);
return;
}
for (i = 0; i < document->nlinks; i++) {
struct link *link = &document->links[i];
int p, q, j;
if (!link->npoints) {
done_link_members(link);
memmove(link, link + 1,
(document->nlinks - i - 1) * sizeof(*link));
document->nlinks--;
i--;
continue;
}
p = link->points[0].y;
q = link->points[link->npoints - 1].y;
if (p > q) j = p, p = q, q = j;
for (j = p; j <= q; j++) {
assertm(j < document->height, "link out of screen");
if_assert_failed continue;
document->lines2[j] = &document->links[i];
if (!document->lines1[j])
document->lines1[j] = &document->links[i];
}
}
}
struct conv_table *
get_convert_table(unsigned char *head, int to_cp,
int default_cp, int *from_cp,
enum cp_status *cp_status, int ignore_server_cp)
{
unsigned char *part = head;
int cp_index = -1;
assert(head);
if_assert_failed return NULL;
if (ignore_server_cp) {
if (cp_status) *cp_status = CP_STATUS_IGNORED;
if (from_cp) *from_cp = default_cp;
return get_translation_table(default_cp, to_cp);
}
while (cp_index == -1) {
unsigned char *ct_charset;
unsigned char *meta;
unsigned char *a = parse_header(part, "Content-Type", &part);
if (!a) break;
/* Content type info from document meta header.
* scan_http_equiv() appends the meta stuff to the protocol header before
* this function is called. Last Content-Type header field is used. */
while ((meta = parse_header(part, "Content-Type", &part))) {
mem_free_set(&a, meta);
}
ct_charset = parse_header_param(a, "charset");
if (ct_charset) {
cp_index = get_cp_index(ct_charset);
mem_free(ct_charset);
}
mem_free(a);
}
if (cp_index == -1) {
unsigned char *a = parse_header(head, "Content-Charset", NULL);
if (a) {
cp_index = get_cp_index(a);
mem_free(a);
}
}
if (cp_index == -1) {
unsigned char *a = parse_header(head, "Charset", NULL);
if (a) {
cp_index = get_cp_index(a);
mem_free(a);
}
}
if (cp_index == -1) {
cp_index = default_cp;
if (cp_status) *cp_status = CP_STATUS_ASSUMED;
} else {
if (cp_status) *cp_status = CP_STATUS_SERVER;
}
if (from_cp) *from_cp = cp_index;
return get_translation_table(cp_index, to_cp);
}