mirror of
https://github.com/rkd77/elinks.git
synced 2025-01-03 14:57:44 -05:00
7645a836fc
straconcat reads the args with va_arg(ap, const unsigned char *), and the NULL macro may have the wrong type (e.g. int). Many places pass string literals of type char * to straconcat. This is in principle also a violation, but I'm ignoring it for now because if it becomes a problem with some C implementation, then so will the use of unsigned char * with printf "%s", which is so widespread in ELinks that I'm not going to try fixing it now.
168 lines
4.6 KiB
C
168 lines
4.6 KiB
C
/* The "data" URI protocol implementation (RFC 2397) */
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
#include "elinks.h"
|
|
|
|
#include "cache/cache.h"
|
|
#include "network/connection.h"
|
|
#include "protocol/data.h"
|
|
#include "protocol/protocol.h"
|
|
#include "protocol/uri.h"
|
|
#include "util/base64.h"
|
|
#include "util/string.h"
|
|
|
|
/* The URLs are of the form:
|
|
*
|
|
* data:[<mediatype>][;base64],<data>
|
|
*
|
|
* The <mediatype> is an Internet media type specification (with optional
|
|
* parameters.) The appearance of ";base64" means that the data is encoded as
|
|
* base64. Without ";base64", the data (as a sequence of octets) is represented
|
|
* using ASCII encoding for octets inside the range of safe URL characters and
|
|
* using the standard %xx hex encoding of URLs for octets outside that range.
|
|
* If <mediatype> is omitted, it defaults to "text/plain;charset=US-ASCII". As a
|
|
* shorthand, "text/plain" can be omitted but the charset parameter supplied.
|
|
*
|
|
* The syntax:
|
|
*
|
|
* dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
|
* mediatype := [ type "/" subtype ] *( ";" parameter )
|
|
* data := *urlchar
|
|
* parameter := attribute "=" value
|
|
*
|
|
* where "urlchar" is imported from [RFC2396], and "type", "subtype",
|
|
* "attribute" and "value" are the corresponding tokens from [RFC2045],
|
|
* represented using URL escaped encoding of [RFC2396] as necessary.
|
|
*
|
|
* Attribute values in [RFC2045] are allowed to be either represented as tokens
|
|
* or as quoted strings. However, within a "data" URL, the "quoted-string"
|
|
* representation would be awkward, since the quote mark is itself not a valid
|
|
* urlchar. For this reason, parameter values should use the URL Escaped
|
|
* encoding instead of quoted string if the parameter values contain any
|
|
* "tspecial".
|
|
*
|
|
* The ";base64" extension is distinguishable from a content-type parameter by
|
|
* the fact that it doesn't have a following "=" sign. */
|
|
|
|
/* FIXME: Maybe some kind of redirecting to common specialized data URI could
|
|
* be useful so "data:,blah" and data:text/plain,blah" are redirected to the
|
|
* most specialized "data:text/plain;charset=US-ASCII,blah". On the other hand
|
|
* for small entries it doesn't matter. */
|
|
|
|
#define DEFAULT_DATA_MEDIATYPE "text/plain;charset=US-ASCII"
|
|
|
|
#define data_has_mediatype(header, headerlen) \
|
|
((headerlen) >= 3 && memchr(header, '/', headerlen))
|
|
|
|
#define data_has_base64_attribute(typelen, endstr) \
|
|
((typelen) >= sizeof(";base64") - 1 \
|
|
&& !memcmp(";base64", (end) - sizeof(";base64") + 1, sizeof(";base64") - 1))
|
|
|
|
static unsigned char *
|
|
init_data_protocol_header(struct cache_entry *cached,
|
|
unsigned char *type, int typelen)
|
|
{
|
|
unsigned char *head;
|
|
|
|
assert(typelen);
|
|
|
|
type = memacpy(type, typelen);
|
|
if (!type) return NULL;
|
|
|
|
/* Set fake content type */
|
|
head = straconcat("\r\nContent-Type: ", type, "\r\n",
|
|
(unsigned char *) NULL);
|
|
mem_free(type);
|
|
if (!head) return NULL;
|
|
|
|
mem_free_set(&cached->head, head);
|
|
return head;
|
|
}
|
|
|
|
static unsigned char *
|
|
parse_data_protocol_header(struct connection *conn, int *base64)
|
|
{
|
|
struct uri *uri = conn->uri;
|
|
unsigned char *end = memchr(uri->data, ',', uri->datalen);
|
|
unsigned char *type = DEFAULT_DATA_MEDIATYPE;
|
|
int typelen = sizeof(DEFAULT_DATA_MEDIATYPE) - 1;
|
|
|
|
if (end) {
|
|
int headerlen = end - uri->data;
|
|
|
|
if (data_has_base64_attribute(headerlen, end)) {
|
|
*base64 = 1;
|
|
headerlen -= sizeof(";base64") - 1;
|
|
}
|
|
|
|
if (data_has_mediatype(uri->data, headerlen)) {
|
|
type = uri->data;
|
|
typelen = headerlen;
|
|
}
|
|
}
|
|
|
|
if (!init_data_protocol_header(conn->cached, type, typelen))
|
|
return NULL;
|
|
|
|
/* Return char after ',' or complete data part */
|
|
return end ? end + 1 : uri->data;
|
|
}
|
|
|
|
void
|
|
data_protocol_handler(struct connection *conn)
|
|
{
|
|
struct uri *uri = conn->uri;
|
|
struct cache_entry *cached = get_cache_entry(uri);
|
|
unsigned char *data_start, *data;
|
|
int base64 = 0;
|
|
|
|
if (!cached) {
|
|
abort_connection(conn, S_OUT_OF_MEM);
|
|
return;
|
|
}
|
|
|
|
conn->cached = cached;
|
|
|
|
data_start = parse_data_protocol_header(conn, &base64);
|
|
if (!data_start) {
|
|
abort_connection(conn, S_OUT_OF_MEM);
|
|
return;
|
|
}
|
|
|
|
/* Allocate the data string because URI decoding will possibly modify
|
|
* it. */
|
|
data = memacpy(data_start, uri->datalen - (data_start - uri->data));
|
|
if (!data) {
|
|
abort_connection(conn, S_OUT_OF_MEM);
|
|
return;
|
|
}
|
|
|
|
if (base64) {
|
|
unsigned char *decoded = base64_encode(data);
|
|
|
|
if (!decoded) {
|
|
abort_connection(conn, S_OUT_OF_MEM);
|
|
return;
|
|
}
|
|
|
|
mem_free_set(&data, decoded);
|
|
} else {
|
|
decode_uri(data);
|
|
}
|
|
|
|
{
|
|
/* Use strlen() to get the correct decoded length */
|
|
int datalen = strlen(data);
|
|
|
|
add_fragment(cached, conn->from, data, datalen);
|
|
conn->from += datalen;
|
|
}
|
|
|
|
mem_free(data);
|
|
|
|
abort_connection(conn, S_OK);
|
|
}
|