2005-09-15 09:58:31 -04:00
|
|
|
/* The "data" URI protocol implementation (RFC 2397) */
|
|
|
|
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
#include "config.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "elinks.h"
|
|
|
|
|
|
|
|
#include "cache/cache.h"
|
|
|
|
#include "network/connection.h"
|
|
|
|
#include "protocol/data.h"
|
|
|
|
#include "protocol/protocol.h"
|
|
|
|
#include "protocol/uri.h"
|
|
|
|
#include "util/base64.h"
|
|
|
|
#include "util/string.h"
|
|
|
|
|
|
|
|
/* The URLs are of the form:
|
|
|
|
*
|
|
|
|
* data:[<mediatype>][;base64],<data>
|
|
|
|
*
|
|
|
|
* The <mediatype> is an Internet media type specification (with optional
|
|
|
|
* parameters.) The appearance of ";base64" means that the data is encoded as
|
|
|
|
* base64. Without ";base64", the data (as a sequence of octets) is represented
|
|
|
|
* using ASCII encoding for octets inside the range of safe URL characters and
|
|
|
|
* using the standard %xx hex encoding of URLs for octets outside that range.
|
|
|
|
* If <mediatype> is omitted, it defaults to "text/plain;charset=US-ASCII". As a
|
|
|
|
* shorthand, "text/plain" can be omitted but the charset parameter supplied.
|
|
|
|
*
|
|
|
|
* The syntax:
|
|
|
|
*
|
|
|
|
* dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
|
|
|
* mediatype := [ type "/" subtype ] *( ";" parameter )
|
|
|
|
* data := *urlchar
|
|
|
|
* parameter := attribute "=" value
|
|
|
|
*
|
|
|
|
* where "urlchar" is imported from [RFC2396], and "type", "subtype",
|
|
|
|
* "attribute" and "value" are the corresponding tokens from [RFC2045],
|
|
|
|
* represented using URL escaped encoding of [RFC2396] as necessary.
|
|
|
|
*
|
|
|
|
* Attribute values in [RFC2045] are allowed to be either represented as tokens
|
|
|
|
* or as quoted strings. However, within a "data" URL, the "quoted-string"
|
|
|
|
* representation would be awkward, since the quote mark is itself not a valid
|
|
|
|
* urlchar. For this reason, parameter values should use the URL Escaped
|
|
|
|
* encoding instead of quoted string if the parameter values contain any
|
|
|
|
* "tspecial".
|
|
|
|
*
|
|
|
|
* The ";base64" extension is distinguishable from a content-type parameter by
|
|
|
|
* the fact that it doesn't have a following "=" sign. */
|
|
|
|
|
|
|
|
/* FIXME: Maybe some kind of redirecting to common specialized data URI could
|
|
|
|
* be useful so "data:,blah" and data:text/plain,blah" are redirected to the
|
|
|
|
* most specialized "data:text/plain;charset=US-ASCII,blah". On the other hand
|
|
|
|
* for small entries it doesn't matter. */
|
|
|
|
|
|
|
|
#define DEFAULT_DATA_MEDIATYPE "text/plain;charset=US-ASCII"
|
|
|
|
|
|
|
|
#define data_has_mediatype(header, headerlen) \
|
|
|
|
((headerlen) >= 3 && memchr(header, '/', headerlen))
|
|
|
|
|
|
|
|
#define data_has_base64_attribute(typelen, endstr) \
|
|
|
|
((typelen) >= sizeof(";base64") - 1 \
|
|
|
|
&& !memcmp(";base64", (end) - sizeof(";base64") + 1, sizeof(";base64") - 1))
|
|
|
|
|
2021-01-02 10:20:27 -05:00
|
|
|
static char *
|
2005-09-15 09:58:31 -04:00
|
|
|
init_data_protocol_header(struct cache_entry *cached,
|
2022-02-21 11:52:17 -05:00
|
|
|
const char *type_, int typelen)
|
2005-09-15 09:58:31 -04:00
|
|
|
{
|
2021-01-02 10:20:27 -05:00
|
|
|
char *head;
|
2022-02-21 11:52:17 -05:00
|
|
|
char *type;
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
assert(typelen);
|
|
|
|
|
2022-02-21 11:52:17 -05:00
|
|
|
type = memacpy(type_, typelen);
|
2005-09-15 09:58:31 -04:00
|
|
|
if (!type) return NULL;
|
|
|
|
|
|
|
|
/* Set fake content type */
|
2007-03-11 06:59:11 -04:00
|
|
|
head = straconcat("\r\nContent-Type: ", type, "\r\n",
|
2021-01-02 10:20:27 -05:00
|
|
|
(char *) NULL);
|
2005-09-15 09:58:31 -04:00
|
|
|
mem_free(type);
|
|
|
|
if (!head) return NULL;
|
|
|
|
|
|
|
|
mem_free_set(&cached->head, head);
|
|
|
|
return head;
|
|
|
|
}
|
|
|
|
|
2021-01-02 10:20:27 -05:00
|
|
|
static char *
|
2005-09-15 09:58:31 -04:00
|
|
|
parse_data_protocol_header(struct connection *conn, int *base64)
|
|
|
|
{
|
|
|
|
struct uri *uri = conn->uri;
|
2022-01-26 11:35:46 -05:00
|
|
|
char *end = (char *)memchr(uri->data, ',', uri->datalen);
|
2022-02-21 11:52:17 -05:00
|
|
|
const char *type = DEFAULT_DATA_MEDIATYPE;
|
2005-09-15 09:58:31 -04:00
|
|
|
int typelen = sizeof(DEFAULT_DATA_MEDIATYPE) - 1;
|
|
|
|
|
|
|
|
if (end) {
|
|
|
|
int headerlen = end - uri->data;
|
|
|
|
|
|
|
|
if (data_has_base64_attribute(headerlen, end)) {
|
|
|
|
*base64 = 1;
|
|
|
|
headerlen -= sizeof(";base64") - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (data_has_mediatype(uri->data, headerlen)) {
|
|
|
|
type = uri->data;
|
|
|
|
typelen = headerlen;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!init_data_protocol_header(conn->cached, type, typelen))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* Return char after ',' or complete data part */
|
|
|
|
return end ? end + 1 : uri->data;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
data_protocol_handler(struct connection *conn)
|
|
|
|
{
|
|
|
|
struct uri *uri = conn->uri;
|
|
|
|
struct cache_entry *cached = get_cache_entry(uri);
|
2021-01-02 10:20:27 -05:00
|
|
|
char *data_start, *data = NULL;
|
2005-09-15 09:58:31 -04:00
|
|
|
int base64 = 0;
|
2020-09-15 02:32:49 -04:00
|
|
|
int decodedlen = 0;
|
|
|
|
int datalen;
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
if (!cached) {
|
2008-08-03 08:24:26 -04:00
|
|
|
abort_connection(conn, connection_state(S_OUT_OF_MEM));
|
2005-09-15 09:58:31 -04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
conn->cached = cached;
|
|
|
|
|
|
|
|
data_start = parse_data_protocol_header(conn, &base64);
|
|
|
|
if (!data_start) {
|
2008-08-03 08:24:26 -04:00
|
|
|
abort_connection(conn, connection_state(S_OUT_OF_MEM));
|
2005-09-15 09:58:31 -04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-09-15 02:32:49 -04:00
|
|
|
datalen = uri->datalen - (data_start - uri->data);
|
2005-09-15 09:58:31 -04:00
|
|
|
if (base64) {
|
2021-01-02 10:20:27 -05:00
|
|
|
char *decoded = base64_decode_bin(data_start, datalen, &decodedlen);
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
if (!decoded) {
|
2008-08-03 08:24:26 -04:00
|
|
|
abort_connection(conn, connection_state(S_OUT_OF_MEM));
|
2005-09-15 09:58:31 -04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
mem_free_set(&data, decoded);
|
|
|
|
} else {
|
2020-09-19 13:40:38 -04:00
|
|
|
/* Allocate the data string because URI decoding will possibly modify
|
|
|
|
* it. */
|
2020-09-19 13:30:47 -04:00
|
|
|
data = memacpy(data_start, datalen);
|
|
|
|
|
|
|
|
if (!data) {
|
|
|
|
abort_connection(conn, connection_state(S_OUT_OF_MEM));
|
|
|
|
return;
|
|
|
|
}
|
2005-09-15 09:58:31 -04:00
|
|
|
decode_uri(data);
|
|
|
|
/* Use strlen() to get the correct decoded length */
|
2020-09-15 02:32:49 -04:00
|
|
|
decodedlen = strlen(data);
|
2005-09-15 09:58:31 -04:00
|
|
|
}
|
|
|
|
|
2020-09-15 02:32:49 -04:00
|
|
|
add_fragment(cached, conn->from, data, decodedlen);
|
|
|
|
conn->from += decodedlen;
|
2005-09-15 09:58:31 -04:00
|
|
|
mem_free(data);
|
|
|
|
|
2008-08-03 08:24:26 -04:00
|
|
|
abort_connection(conn, connection_state(S_OK));
|
2005-09-15 09:58:31 -04:00
|
|
|
}
|