2005-09-15 09:58:31 -04:00
|
|
|
/* Functionality for handling mime types */
|
|
|
|
|
2006-11-06 09:27:54 -05:00
|
|
|
#ifndef _GNU_SOURCE
|
|
|
|
#define _GNU_SOURCE /* XXX: we _WANT_ strcasestr() ! */
|
|
|
|
#endif
|
|
|
|
|
2005-09-15 09:58:31 -04:00
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
#include "config.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include "elinks.h"
|
|
|
|
|
|
|
|
#include "cache/cache.h"
|
|
|
|
#include "config/options.h"
|
|
|
|
#include "encoding/encoding.h"
|
2021-08-08 15:25:08 -04:00
|
|
|
#include "intl/libintl.h"
|
2005-09-15 09:58:31 -04:00
|
|
|
#include "main/module.h"
|
|
|
|
#include "mime/backend/common.h"
|
|
|
|
#include "mime/mime.h"
|
|
|
|
#include "protocol/header.h" /* For parse_header() */
|
|
|
|
#include "protocol/uri.h"
|
|
|
|
#include "util/conv.h"
|
|
|
|
#include "util/file.h"
|
|
|
|
#include "util/memory.h"
|
|
|
|
#include "util/string.h"
|
|
|
|
|
|
|
|
|
|
|
|
enum mime_options {
|
|
|
|
MIME_TREE,
|
|
|
|
MIME_DEFAULT_TYPE,
|
|
|
|
|
|
|
|
MIME_OPTIONS,
|
|
|
|
};
|
|
|
|
|
bug 764: Initialize the right member of union option_value
INIT_OPTION used to initialize union option_value at compile time by
casting the default value to LIST_OF(struct option) *, which is the
type of the first member. On sparc64 and other big-endian systems
where sizeof(int) < sizeof(struct list_head *), this tended to leave
option->value.number as zero, thus messing up OPT_INT and OPT_BOOL
at least. OPT_LONG however tended to work right.
This would be easy to fix with C99 designated initializers,
but doc/hacking.txt says ELinks must be kept C89 compatible.
Another solution would be to make register_options() read the
value from option->value.tree (the first member), cast it back
to the right type, and write it to the appropriate member;
but that would still require somewhat dubious conversions
between integers, data pointers, and function pointers.
So here's a rather more invasive solution. Add struct option_init,
which is somewhat similar to struct option but has non-overlapping
members for different types of values, to ensure nothing is lost
in compile-time conversions. Move unsigned char *path from struct
option_info to struct option_init, and replace struct option_info
with a union that contains struct option_init and struct option.
Now, this union can be initialized with no portability problems,
and register_options() then moves the values from struct option_init
to their final places in struct option.
In my x86 ELinks build with plenty of options configured in, this
change bloated the text section by 340 bytes but compressed the data
section by 2784 bytes, presumably because union option_info is a
pointer smaller than struct option_info was.
(cherry picked from elinks-0.12 commit e5f6592ee20780a61f70feeb1f9e17631b9c5835)
Conflicts:
src/protocol/fsp/fsp.c: All options had been removed in 0.13.GIT.
src/protocol/smb/smb2.c: Ditto.
2009-08-15 15:39:07 -04:00
|
|
|
static union option_info mime_options[] = {
|
2022-03-02 12:30:25 -05:00
|
|
|
INIT_OPT_TREE("", N_("MIME"),
|
|
|
|
"mime", OPT_SORT,
|
2005-09-15 09:58:31 -04:00
|
|
|
N_("MIME-related options (handlers of various MIME types).")),
|
|
|
|
|
2022-03-02 12:30:25 -05:00
|
|
|
INIT_OPT_STRING("mime", N_("Default MIME-type"),
|
|
|
|
"default_type", OPT_ZERO, DEFAULT_MIME_TYPE,
|
Rewrap lines in option documentation.
Documentation strings of most options used to contain a "\n" at the
end of each source line. When the option manager displayed these
strings, it treated each "\n" as a hard newline. On 80x24 terminals
however, the option description window has only 60 columes available
for the text (with the default setup.h), and the hard newlines were
further apart, so the option manager wrapped the text a second time,
resulting in rather ugly output where long lones are interleaved with
short ones. This could also cause the text to take up too much
vertical space and not fit in the window.
Replace most of those hard newlines with spaces so that the option
manager (or perhaps BFU) will take care of the wrapping. At the same
time, rewrap the strings in source code so that the source lines are
at most 79 columns wide.
In some options though, there is a list of possible values and their
meanings. In those lists, if the description of one value does not
fit in one line, then continuation lines should be indented. The
option manager and BFU are not currently able to do that. So, keep
the hard newlines in those lists, but rewrap them to 60 columns so
that they are less likely to require further wrapping at runtime.
2009-03-07 13:48:38 -05:00
|
|
|
N_("Document MIME-type to assume by default "
|
|
|
|
"(when we are unable to guess it properly "
|
|
|
|
"from known information about the document).")),
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
NULL_OPTION_INFO,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define get_opt_mime(which) mime_options[(which)].option
|
|
|
|
#define get_default_mime_type() get_opt_mime(MIME_DEFAULT_TYPE).value.string
|
|
|
|
|
|
|
|
/* Checks protocols headers for a suitable filename */
|
2021-01-02 10:20:27 -05:00
|
|
|
static char *
|
2005-09-15 09:58:31 -04:00
|
|
|
get_content_filename(struct uri *uri, struct cache_entry *cached)
|
|
|
|
{
|
2021-01-02 10:20:27 -05:00
|
|
|
char *filename, *pos;
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
if (!cached) cached = find_in_cache(uri);
|
|
|
|
|
|
|
|
if (!cached || !cached->head)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
pos = parse_header(cached->head, "Content-Disposition", NULL);
|
|
|
|
if (!pos) return NULL;
|
|
|
|
|
2016-08-21 16:02:46 -04:00
|
|
|
parse_header_param(pos, "filename", &filename, 1);
|
2005-09-15 09:58:31 -04:00
|
|
|
mem_free(pos);
|
|
|
|
if (!filename) return NULL;
|
|
|
|
|
|
|
|
/* Remove start and ending quotes. */
|
|
|
|
if (filename[0] == '"') {
|
|
|
|
int len = strlen(filename);
|
|
|
|
|
|
|
|
if (len > 1 && filename[len - 1] == '"') {
|
|
|
|
filename[len - 1] = 0;
|
|
|
|
memmove(filename, filename + 1, len);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* It was an empty quotation: "" */
|
|
|
|
if (!filename[1]) {
|
|
|
|
mem_free(filename);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We don't want to add any directories from the path so make sure we
|
|
|
|
* only add the filename. */
|
|
|
|
pos = get_filename_position(filename);
|
|
|
|
if (!*pos) {
|
|
|
|
mem_free(filename);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pos > filename)
|
|
|
|
memmove(filename, pos, strlen(pos) + 1);
|
|
|
|
|
|
|
|
return filename;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Checks if application/x-<extension> has any handlers. */
|
2021-01-02 10:20:27 -05:00
|
|
|
static inline char *
|
|
|
|
check_extension_type(char *extension)
|
2005-09-15 09:58:31 -04:00
|
|
|
{
|
|
|
|
/* Trim the extension so only last .<extension> is used. */
|
2022-01-18 14:42:29 -05:00
|
|
|
char *trimmed = strrchr(extension, '.');
|
2005-09-15 09:58:31 -04:00
|
|
|
struct mime_handler *handler;
|
2021-01-02 10:20:27 -05:00
|
|
|
char *content_type;
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
if (!trimmed)
|
|
|
|
return NULL;
|
|
|
|
|
2007-03-11 06:59:11 -04:00
|
|
|
content_type = straconcat("application/x-", trimmed + 1,
|
2021-01-02 10:20:27 -05:00
|
|
|
(char *) NULL);
|
2005-09-15 09:58:31 -04:00
|
|
|
if (!content_type)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
handler = get_mime_type_handler(content_type, 1);
|
|
|
|
if (handler) {
|
|
|
|
mem_free(handler);
|
|
|
|
return content_type;
|
|
|
|
}
|
|
|
|
|
|
|
|
mem_free(content_type);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check if part of the extension coresponds to a supported encoding and if it
|
|
|
|
* has any handlers. */
|
2021-01-02 10:20:27 -05:00
|
|
|
static inline char *
|
|
|
|
check_encoding_type(char *extension)
|
2005-09-15 09:58:31 -04:00
|
|
|
{
|
2022-01-28 10:17:25 -05:00
|
|
|
stream_encoding_T encoding = guess_encoding(extension);
|
2021-01-02 10:20:27 -05:00
|
|
|
const char *const *extension_list;
|
2022-01-18 14:42:29 -05:00
|
|
|
char *last_extension = strrchr(extension, '.');
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
if (encoding == ENCODING_NONE || !last_extension)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
for (extension_list = listext_encoded(encoding);
|
|
|
|
extension_list && *extension_list;
|
|
|
|
extension_list++) {
|
2021-01-02 10:20:27 -05:00
|
|
|
char *content_type;
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
if (strcmp(*extension_list, last_extension))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
*last_extension = '\0';
|
|
|
|
content_type = get_content_type_backends(extension);
|
|
|
|
*last_extension = '.';
|
|
|
|
|
|
|
|
return content_type;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
#define DEBUG_CONTENT_TYPE
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef DEBUG_CONTENT_TYPE
|
|
|
|
#define debug_get_content_type_params(cached) \
|
|
|
|
DBG("get_content_type(head, url)\n=== head ===\n%s\n=== url ===\n%s\n", (cached)->head, struri((cached)->uri))
|
|
|
|
#define debug_ctype(ctype__) DBG("ctype= %s", (ctype__))
|
|
|
|
#define debug_extension(extension__) DBG("extension= %s", (extension__))
|
|
|
|
#else
|
|
|
|
#define debug_get_content_type_params(cached)
|
|
|
|
#define debug_ctype(ctype__)
|
|
|
|
#define debug_extension(extension__)
|
|
|
|
#endif
|
|
|
|
|
2021-01-02 10:20:27 -05:00
|
|
|
char *
|
|
|
|
get_extension_content_type(char *extension)
|
2005-09-15 09:58:31 -04:00
|
|
|
{
|
2021-01-02 10:20:27 -05:00
|
|
|
char *ctype;
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
assert(extension && *extension);
|
|
|
|
|
|
|
|
ctype = get_content_type_backends(extension);
|
|
|
|
debug_ctype(ctype);
|
|
|
|
if (ctype) return ctype;
|
|
|
|
|
|
|
|
ctype = check_encoding_type(extension);
|
|
|
|
debug_ctype(ctype);
|
|
|
|
if (ctype) return ctype;
|
|
|
|
|
|
|
|
ctype = check_extension_type(extension);
|
|
|
|
debug_ctype(ctype);
|
|
|
|
return ctype;
|
|
|
|
}
|
|
|
|
|
2021-01-02 10:20:27 -05:00
|
|
|
char *
|
2005-09-15 09:58:31 -04:00
|
|
|
get_cache_header_content_type(struct cache_entry *cached)
|
|
|
|
{
|
2021-01-02 10:20:27 -05:00
|
|
|
char *extension, *ctype;
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
ctype = parse_header(cached->head, "Content-Type", NULL);
|
|
|
|
if (ctype) {
|
2022-01-18 14:30:48 -05:00
|
|
|
char *end = strchr(ctype, ';');
|
2005-09-15 09:58:31 -04:00
|
|
|
int ctypelen;
|
|
|
|
|
|
|
|
if (end) *end = '\0';
|
|
|
|
|
|
|
|
ctypelen = strlen(ctype);
|
|
|
|
while (ctypelen && ctype[--ctypelen] <= ' ')
|
|
|
|
ctype[ctypelen] = '\0';
|
|
|
|
|
|
|
|
debug_ctype(ctype);
|
|
|
|
|
|
|
|
if (*ctype) {
|
|
|
|
return ctype;
|
|
|
|
}
|
|
|
|
|
|
|
|
mem_free(ctype);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This searches cached->head for filename so put here */
|
|
|
|
extension = get_content_filename(cached->uri, cached);
|
|
|
|
debug_extension(extension);
|
|
|
|
if (extension) {
|
|
|
|
ctype = get_extension_content_type(extension);
|
|
|
|
mem_free(extension);
|
|
|
|
if (ctype) {
|
|
|
|
return ctype;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-01-02 10:20:27 -05:00
|
|
|
static char *
|
2006-11-05 23:15:50 -05:00
|
|
|
get_fragment_content_type(struct cache_entry *cached)
|
|
|
|
{
|
|
|
|
struct fragment *fragment;
|
|
|
|
size_t length;
|
2021-01-02 10:20:27 -05:00
|
|
|
char *sample;
|
|
|
|
char *ctype = NULL;
|
2006-11-05 23:15:50 -05:00
|
|
|
|
|
|
|
if (list_empty(cached->frag))
|
|
|
|
return NULL;
|
|
|
|
|
2022-01-25 12:25:58 -05:00
|
|
|
fragment = (struct fragment *)cached->frag.next;
|
2006-11-05 23:15:50 -05:00
|
|
|
if (fragment->offset)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
length = fragment->length > 1024 ? 1024 : fragment->length;
|
|
|
|
sample = memacpy(fragment->data, length);
|
|
|
|
if (!sample)
|
|
|
|
return NULL;
|
|
|
|
|
2016-04-20 14:11:08 -04:00
|
|
|
if (c_strcasestr((const char *)sample, "<html>"))
|
2006-11-05 23:15:50 -05:00
|
|
|
ctype = stracpy("text/html");
|
|
|
|
|
|
|
|
mem_free(sample);
|
|
|
|
|
|
|
|
return ctype;
|
|
|
|
}
|
|
|
|
|
2021-01-02 10:20:27 -05:00
|
|
|
char *
|
2005-09-15 09:58:31 -04:00
|
|
|
get_content_type(struct cache_entry *cached)
|
|
|
|
{
|
2021-01-02 10:20:27 -05:00
|
|
|
char *extension, *ctype;
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
debug_get_content_type_params(cached);
|
|
|
|
|
|
|
|
if (cached->content_type)
|
|
|
|
return cached->content_type;
|
|
|
|
|
|
|
|
/* If there's one in header, it's simple.. */
|
|
|
|
if (cached->head) {
|
|
|
|
ctype = get_cache_header_content_type(cached);
|
|
|
|
if (ctype && *ctype) {
|
|
|
|
cached->content_type = ctype;
|
|
|
|
return ctype;
|
|
|
|
}
|
|
|
|
mem_free_if(ctype);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We can't use the extension string we are getting below, because we
|
|
|
|
* want to support also things like "ps.gz" - that'd never work, as we
|
|
|
|
* would always compare only to "gz". */
|
|
|
|
/* Guess type accordingly to the extension */
|
|
|
|
extension = get_extension_from_uri(cached->uri);
|
|
|
|
debug_extension(extension);
|
|
|
|
|
|
|
|
if (extension) {
|
|
|
|
/* XXX: A little hack for making extension handling case
|
|
|
|
* insensitive. We could probably do it better by making
|
|
|
|
* guess_encoding() case independent the real problem however
|
|
|
|
* is with default (via option system) and mimetypes resolving
|
|
|
|
* doing that option and hash lookup will not be easy to
|
|
|
|
* convert. --jonas */
|
2008-11-01 13:18:06 -04:00
|
|
|
convert_to_lowercase_locale_indep(extension, strlen(extension));
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
ctype = get_extension_content_type(extension);
|
|
|
|
mem_free(extension);
|
|
|
|
if (ctype && *ctype) {
|
|
|
|
cached->content_type = ctype;
|
|
|
|
return ctype;
|
|
|
|
}
|
|
|
|
mem_free_if(ctype);
|
|
|
|
}
|
|
|
|
|
2006-11-05 23:15:50 -05:00
|
|
|
ctype = get_fragment_content_type(cached);
|
|
|
|
if (ctype && *ctype) {
|
|
|
|
cached->content_type = ctype;
|
|
|
|
return ctype;
|
|
|
|
}
|
|
|
|
|
2005-09-15 09:58:31 -04:00
|
|
|
debug_ctype(get_default_mime_type());
|
|
|
|
|
2010-07-17 23:11:13 -04:00
|
|
|
/* text/plain for pager mode */
|
|
|
|
if (cached->uri && cached->uri->string
|
|
|
|
&& !strcmp(cached->uri->string, "file:///dev/stdin")) {
|
|
|
|
cached->content_type = stracpy("text/plain");
|
|
|
|
} else
|
|
|
|
/* Fallback.. use some hardwired default */
|
|
|
|
cached->content_type = stracpy(get_default_mime_type());
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
return cached->content_type;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct mime_handler *
|
2021-01-02 10:20:27 -05:00
|
|
|
get_mime_type_handler(char *content_type, int xwin)
|
2005-09-15 09:58:31 -04:00
|
|
|
{
|
|
|
|
return get_mime_handler_backends(content_type, xwin);
|
|
|
|
}
|
|
|
|
|
2019-04-21 06:27:40 -04:00
|
|
|
struct string *
|
|
|
|
add_mime_filename_to_string(struct string *string, struct uri *uri)
|
2005-09-15 09:58:31 -04:00
|
|
|
{
|
2021-01-02 10:20:27 -05:00
|
|
|
char *filename = get_content_filename(uri, NULL);
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
assert(uri->data);
|
|
|
|
|
|
|
|
if (filename) {
|
|
|
|
add_shell_safe_to_string(string, filename, strlen(filename));
|
|
|
|
mem_free(filename);
|
|
|
|
|
|
|
|
return string;
|
|
|
|
}
|
|
|
|
|
|
|
|
return add_uri_to_string(string, uri, URI_FILENAME);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Backends dynamic area: */
|
|
|
|
|
|
|
|
#include "mime/backend/default.h"
|
2022-05-21 12:17:04 -04:00
|
|
|
#include "mime/backend/dgi.h"
|
2005-09-15 09:58:31 -04:00
|
|
|
#include "mime/backend/mailcap.h"
|
|
|
|
#include "mime/backend/mimetypes.h"
|
|
|
|
|
|
|
|
static struct module *mime_submodules[] = {
|
|
|
|
&default_mime_module,
|
2022-05-21 12:17:04 -04:00
|
|
|
#ifdef CONFIG_DGI
|
|
|
|
&dgi_mime_module,
|
|
|
|
#endif
|
2005-09-15 09:58:31 -04:00
|
|
|
#ifdef CONFIG_MAILCAP
|
|
|
|
&mailcap_mime_module,
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_MIMETYPES
|
|
|
|
&mimetypes_mime_module,
|
|
|
|
#endif
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct module mime_module = struct_module(
|
|
|
|
/* name: */ N_("MIME"),
|
|
|
|
/* options: */ mime_options,
|
|
|
|
/* hooks: */ NULL,
|
|
|
|
/* submodules: */ mime_submodules,
|
|
|
|
/* data: */ NULL,
|
|
|
|
/* init: */ NULL,
|
|
|
|
/* done: */ NULL
|
|
|
|
);
|