From 90086599da8d19d02daff7557cd4c4e11e8e05ca Mon Sep 17 00:00:00 2001 From: Witold Filipczyk Date: Sun, 14 Jul 2019 18:54:39 +0200 Subject: [PATCH] [encoding] Experimental zstd encoding --- Makefile.config.in | 1 + configure.ac | 3 + src/encoding/Makefile | 1 + src/encoding/encoding.c | 2 + src/encoding/encoding.h | 1 + src/encoding/zstd.c | 187 +++++++++++++++++++++++++++++++++++++++ src/encoding/zstd.h | 12 +++ src/main/version.c | 3 + src/protocol/http/http.c | 16 +++- 9 files changed, 224 insertions(+), 2 deletions(-) create mode 100644 src/encoding/zstd.c create mode 100644 src/encoding/zstd.h diff --git a/Makefile.config.in b/Makefile.config.in index e0f61c2d..9ab1844e 100644 --- a/Makefile.config.in +++ b/Makefile.config.in @@ -177,6 +177,7 @@ CONFIG_UTF8 = @CONFIG_UTF8@ CONFIG_XBEL_BOOKMARKS = @CONFIG_XBEL_BOOKMARKS@ CONFIG_XMLTO = @CONFIG_XMLTO@ CONFIG_GSSAPI = @CONFIG_GSSAPI@ +CONFIG_ZSTD = @CONFIG_ZSTD@ DEFS = @DEFS@ CFLAGS = @CFLAGS@ diff --git a/configure.ac b/configure.ac index 946984b5..63c8ba6a 100644 --- a/configure.ac +++ b/configure.ac @@ -491,6 +491,9 @@ EL_CONFIG_OPTIONAL_LIBRARY(CONFIG_BZIP2, bzlib, bzlib.h, bz2, BZ2_bzReadOpen, EL_CONFIG_OPTIONAL_LIBRARY(CONFIG_BROTLI, brotli, brotli/decode.h, brotlidec, BrotliDecoderDecompressStream, [ --with-brotli enable experimental brotli support]) +EL_CONFIG_OPTIONAL_LIBRARY(CONFIG_ZSTD, zstd, zstd.h, zstd, ZSTD_createDCtx, + [ --with-zstd enable experimental zstd support]) + EL_CONFIG_OPTIONAL_LIBRARY(CONFIG_IDN, idn, idna.h, idn, stringprep_check_version, [ --without-idn disable international domain names support]) diff --git a/src/encoding/Makefile b/src/encoding/Makefile index 0615bae9..d4266447 100644 --- a/src/encoding/Makefile +++ b/src/encoding/Makefile @@ -6,6 +6,7 @@ OBJS-$(CONFIG_BROTLI) += brotli.o OBJS-$(CONFIG_BZIP2) += bzip2.o OBJS-$(CONFIG_GZIP) += deflate.o OBJS-$(CONFIG_LZMA) += lzma.o +OBJS-$(CONFIG_ZSTD) += zstd.o OBJS = encoding.o diff --git a/src/encoding/encoding.c b/src/encoding/encoding.c index d072da7f..fe2cdb28 100644 --- a/src/encoding/encoding.c +++ b/src/encoding/encoding.c @@ -87,6 +87,7 @@ static const struct decoding_backend dummy_decoding_backend = { #include "encoding/bzip2.h" #include "encoding/deflate.h" #include "encoding/lzma.h" +#include "encoding/zstd.h" static const struct decoding_backend *const decoding_backends[] = { &dummy_decoding_backend, @@ -95,6 +96,7 @@ static const struct decoding_backend *const decoding_backends[] = { &lzma_decoding_backend, &deflate_decoding_backend, &brotli_decoding_backend, + &zstd_decoding_backend }; diff --git a/src/encoding/encoding.h b/src/encoding/encoding.h index f196c2c0..2eee7d71 100644 --- a/src/encoding/encoding.h +++ b/src/encoding/encoding.h @@ -11,6 +11,7 @@ enum stream_encoding { ENCODING_LZMA, ENCODING_DEFLATE, ENCODING_BROTLI, + ENCODING_ZSTD, /* Max. number of known encoding including ENCODING_NONE. */ ENCODINGS_KNOWN, diff --git a/src/encoding/zstd.c b/src/encoding/zstd.c new file mode 100644 index 00000000..2862b1a0 --- /dev/null +++ b/src/encoding/zstd.c @@ -0,0 +1,187 @@ +/* zstd encoding (ENCODING_ZSTD) backend */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_ZSTD_H +#include +#endif +#include + +#include "elinks.h" + +#include "encoding/zstd.h" +#include "encoding/encoding.h" +#include "util/memory.h" + +/* How many bytes of compressed data to read before decompressing. + */ +#define ELINKS_ZSTD_BUFFER_LENGTH 16384 + +struct zstd_enc_data { + ZSTD_DCtx *zstd_stream; + ZSTD_inBuffer input; + ZSTD_outBuffer output; + /* The file descriptor from which we read. */ + int fdread; + int last_read:1; + + /* A buffer for data that has been read from the file but not + * yet decompressed. fbz_stream.next_in and fbz_stream.avail_in + * refer to this buffer. */ + unsigned char buf[ELINKS_ZSTD_BUFFER_LENGTH]; +}; + +static int +zstd_open(struct stream_encoded *stream, int fd) +{ + struct zstd_enc_data *data = mem_calloc(1, sizeof(*data)); + + stream->data = NULL; + if (!data) { + return -1; + } + + data->fdread = fd; + data->zstd_stream = ZSTD_createDCtx(); + + if (!data->zstd_stream) { + mem_free(data); + return -1; + } + + stream->data = data; + + return 0; +} + +static int +zstd_read(struct stream_encoded *stream, unsigned char *buf, int len) +{ + struct zstd_enc_data *data = (struct zstd_enc_data *) stream->data; + int err = 0; + + if (!data) return -1; + + assert(len > 0); + + if (data->last_read) { + return 0; + } + + data->output.size = len; + data->output.dst = buf; + data->output.pos = 0; + + do { + if (data->output.pos == data->output.size) { + break; + } + if (data->input.pos == data->input.size) { + int l = safe_read(data->fdread, data->buf, + ELINKS_ZSTD_BUFFER_LENGTH); + + if (l == -1) { + if (errno == EAGAIN) + break; + else + return -1; /* I/O error */ + } else if (l == 0) { + /* EOF. It is error: we wait for more bytes */ + return -1; + } + + data->input.src = data->buf; + data->input.size = l; + data->input.pos = 0; + } + + err = ZSTD_decompressStream(data->zstd_stream, &data->output , &data->input); + + if (ZSTD_isError(err)) { + break; + } + } while (data->input.pos < data->input.size); + + if (!err) { + data->last_read = 1; + } + + return data->output.pos; +} + +static unsigned char * +zstd_decode_buffer(struct stream_encoded *st, unsigned char *data, int len, int *new_len) +{ + struct zstd_enc_data *enc_data = (struct zstd_enc_data *)st->data; + int error; + + *new_len = 0; /* default, left there if an error occurs */ + + enc_data->input.src = data; + enc_data->input.pos = 0; + enc_data->input.size = len; + enc_data->output.pos = 0; + enc_data->output.size = 0; + enc_data->output.dst = NULL; + + do { + unsigned char *new_buffer; + size_t size = enc_data->output.size + ELINKS_ZSTD_BUFFER_LENGTH; + + new_buffer = mem_realloc(enc_data->output.dst, size); + if (!new_buffer) { + error = 1; + break; + } + + enc_data->output.dst = new_buffer; + enc_data->output.size += ELINKS_ZSTD_BUFFER_LENGTH; + + error = ZSTD_decompressStream(enc_data->zstd_stream, &enc_data->output , &enc_data->input); + + if (ZSTD_isError(error)) { + mem_free_if(enc_data->output.dst); + enc_data->output.dst = NULL; + return NULL; + } + } while (enc_data->input.pos < enc_data->input.size); + + *new_len = enc_data->output.pos; + return enc_data->output.dst; +} + +static void +zstd_close(struct stream_encoded *stream) +{ + struct zstd_enc_data *data = (struct zstd_enc_data *) stream->data; + + if (data) { + if (data->zstd_stream) { + ZSTD_freeDCtx(data->zstd_stream); + data->zstd_stream = NULL; + } + if (data->fdread != -1) { + close(data->fdread); + } + mem_free(data); + stream->data = 0; + } +} + +static const unsigned char *const zstd_extensions[] = { ".zst", NULL }; + +const struct decoding_backend zstd_decoding_backend = { + "zstd", + zstd_extensions, + zstd_open, + zstd_read, + zstd_decode_buffer, + zstd_close, +}; diff --git a/src/encoding/zstd.h b/src/encoding/zstd.h new file mode 100644 index 00000000..12aefb59 --- /dev/null +++ b/src/encoding/zstd.h @@ -0,0 +1,12 @@ +#ifndef EL__ENCODING_ZSTD_H +#define EL__ENCODING_ZSTD_H + +#include "encoding/encoding.h" + +#ifdef CONFIG_ZSTD +extern const struct decoding_backend zstd_decoding_backend; +#else +#define zstd_decoding_backend dummy_decoding_backend +#endif + +#endif diff --git a/src/main/version.c b/src/main/version.c index 84953f57..4e5eabb7 100644 --- a/src/main/version.c +++ b/src/main/version.c @@ -143,6 +143,9 @@ get_dyn_full_version(struct terminal *term, int more) #ifdef CONFIG_LZMA comma, "lzma", #endif +#ifdef CONFIG_ZSTD + comma, "zstd", +#endif #ifndef CONFIG_MOUSE comma, _("No mouse", term), #endif diff --git a/src/protocol/http/http.c b/src/protocol/http/http.c index 1df90332..bd2d5a5f 100644 --- a/src/protocol/http/http.c +++ b/src/protocol/http/http.c @@ -588,12 +588,18 @@ init_http_connection_info(struct connection *conn, int major, int minor, int clo static void accept_encoding_header(struct string *header) { -#if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2) || defined(CONFIG_LZMA) || defined(CONFIG_BROTLI) +#if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2) || defined(CONFIG_LZMA) || defined(CONFIG_BROTLI) || defined(CONFIG_ZSTD) int comma = 0; add_to_string(header, "Accept-Encoding: "); +#ifdef CONFIG_ZSTD + add_to_string(header, "zstd"); + comma = 1; +#endif + #ifdef CONFIG_BROTLI + if (comma) add_to_string(header, ", "); add_to_string(header, "br"); comma = 1; #endif @@ -1864,7 +1870,7 @@ again: d = parse_header(conn->cached->head, "Content-Encoding", NULL); if (d) { -#if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2) || defined(CONFIG_LZMA) || defined(CONFIG_BROTLI) +#if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2) || defined(CONFIG_LZMA) || defined(CONFIG_BROTLI) || defined(CONFIG_ZSTD) unsigned char *extension = get_extension_from_uri(uri); enum stream_encoding file_encoding; @@ -1882,6 +1888,12 @@ again: conn->content_encoding = ENCODING_DEFLATE; #endif +#ifdef CONFIG_ZSTD + if (file_encoding != ENCODING_ZSTD + && (!c_strcasecmp(d, "zstd"))) + conn->content_encoding = ENCODING_ZSTD; +#endif + #ifdef CONFIG_BROTLI if (file_encoding != ENCODING_BROTLI && (!c_strcasecmp(d, "br")))