1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-10-04 04:14:18 -04:00
elinks/src/encoding/deflate.c
2015-08-22 19:28:50 +02:00

288 lines
7.2 KiB
C

/* deflate/gzip encoding backend */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <string.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_ZLIB_H
#include <zlib.h>
#endif
#include <errno.h>
#include "elinks.h"
#include "encoding/deflate.h"
#include "encoding/encoding.h"
#include "util/memory.h"
/* How many bytes of compressed data to read before decompressing. */
#define ELINKS_DEFLATE_BUFFER_LENGTH 5000
struct deflate_enc_data {
z_stream deflate_stream;
/* The file descriptor from which we read. */
int fdread;
unsigned int last_read:1;
unsigned int after_first_read:1;
unsigned int after_end:1;
/* A buffer for data that has been read from the file but not
* yet decompressed. z_stream.next_in and z_stream.avail_in
* refer to this buffer. */
unsigned char buf[ELINKS_DEFLATE_BUFFER_LENGTH];
};
static int
deflate_open(int window_size, struct stream_encoded *stream, int fd)
{
/* A zero-initialized z_stream. The compiler ensures that all
* pointer members in it are null. (Can't do this with memset
* because C99 does not require all-bits-zero to be a null
* pointer.) */
static const z_stream null_z_stream = {0};
int err;
struct deflate_enc_data *data = mem_alloc(sizeof(*data));
stream->data = NULL;
if (!data) {
return -1;
}
/* Initialize all members of *data, except data->buf[], which
* will be initialized on demand by deflate_read. */
copy_struct(&data->deflate_stream, &null_z_stream);
data->fdread = fd;
data->last_read = 0;
data->after_first_read = 0;
data->after_end = 0;
if (window_size > 0) {
err = inflateInit2(&data->deflate_stream, window_size);
} else {
err = inflateInit(&data->deflate_stream);
}
if (err != Z_OK) {
mem_free(data);
return -1;
}
stream->data = data;
return 0;
}
static int
deflate_raw_open(struct stream_encoded *stream, int fd)
{
/* raw DEFLATE with neither zlib nor gzip header */
return deflate_open(-MAX_WBITS, stream, fd);
}
static int
deflate_gzip_open(struct stream_encoded *stream, int fd)
{
/* detect gzip header, else assume zlib header */
return deflate_open(MAX_WBITS + 32, stream, fd);
}
static int
deflate_read(struct stream_encoded *stream, unsigned char *buf, int len)
{
struct deflate_enc_data *data = (struct deflate_enc_data *) stream->data;
int err = 0;
int l = 0;
if (!data) return -1;
assert(len > 0);
if (data->last_read) return 0;
data->deflate_stream.avail_out = len;
data->deflate_stream.next_out = buf;
do {
if (data->deflate_stream.avail_in == 0) {
l = safe_read(data->fdread, data->buf,
ELINKS_DEFLATE_BUFFER_LENGTH);
if (l == -1) {
if (errno == EAGAIN)
break;
else
return -1; /* I/O error */
} else if (l == 0) {
/* EOF. It is error: we wait for more bytes */
return -1;
}
data->deflate_stream.next_in = data->buf;
data->deflate_stream.avail_in = l;
}
restart:
err = inflate(&data->deflate_stream, Z_SYNC_FLUSH);
if (err == Z_DATA_ERROR && !data->after_first_read
&& data->deflate_stream.next_out == buf) {
/* RFC 2616 requires a zlib header for
* "Content-Encoding: deflate", but some HTTP
* servers (Microsoft-IIS/6.0 at blogs.msdn.com,
* and reportedly Apache with mod_deflate) omit
* that, causing Z_DATA_ERROR. Clarification of
* the term "deflate" has been requested for the
* next version of HTTP:
* http://www3.tools.ietf.org/wg/httpbis/trac/ticket/73
*
* Try to recover by telling zlib not to expect
* the header. If the error does not happen on
* the first inflate() call, then it is too late
* to recover because ELinks may already have
* discarded part of the input data.
*
* TODO: This fallback to raw DEFLATE is currently
* enabled for "Content-Encoding: gzip" too. It
* might be better to fall back to no compression
* at all, because Apache can send that header for
* uncompressed *.gz.md5 files. */
data->after_first_read = 1;
inflateEnd(&data->deflate_stream);
data->deflate_stream.avail_out = len;
data->deflate_stream.next_out = buf;
data->deflate_stream.next_in = data->buf;
data->deflate_stream.avail_in = l;
err = inflateInit2(&data->deflate_stream, -MAX_WBITS);
if (err == Z_OK) goto restart;
}
data->after_first_read = 1;
if (err == Z_STREAM_END) {
data->last_read = 1;
break;
} else if (err != Z_OK) {
data->last_read = 1;
break;
}
} while (data->deflate_stream.avail_out > 0);
assert(len - data->deflate_stream.avail_out == data->deflate_stream.next_out - buf);
return len - data->deflate_stream.avail_out;
}
static unsigned char *
deflate_decode_buffer(struct stream_encoded *st, int window_size, unsigned char *data, int len, int *new_len)
{
struct deflate_enc_data *enc_data = (struct deflate_enc_data *) st->data;
z_stream *stream = &enc_data->deflate_stream;
unsigned char *buffer = NULL;
int error;
*new_len = 0; /* default, left there if an error occurs */
if (!len) return NULL;
stream->next_in = data;
stream->avail_in = len;
stream->total_out = 0;
do {
unsigned char *new_buffer;
size_t size = stream->total_out + MAX_STR_LEN;
new_buffer = mem_realloc(buffer, size);
if (!new_buffer) {
error = Z_MEM_ERROR;
break;
}
buffer = new_buffer;
stream->next_out = buffer + stream->total_out;
stream->avail_out = MAX_STR_LEN;
restart2:
error = inflate(stream, Z_SYNC_FLUSH);
if (error == Z_STREAM_END) {
break;
}
if (error == Z_DATA_ERROR && !enc_data->after_first_read) {
(void)inflateEnd(stream);
error = inflateInit2(stream, -MAX_WBITS);
if (error == Z_OK) {
enc_data->after_first_read = 1;
stream->next_in = data;
stream->avail_in = len;
goto restart2;
}
}
} while (error == Z_OK && stream->avail_in > 0);
if (error == Z_STREAM_END) {
inflateEnd(stream);
enc_data->after_end = 1;
error = Z_OK;
}
if (error == Z_OK) {
*new_len = stream->total_out;
return buffer;
} else {
mem_free_if(buffer);
return NULL;
}
}
static unsigned char *
deflate_raw_decode_buffer(struct stream_encoded *st, unsigned char *data, int len, int *new_len)
{
/* raw DEFLATE with neither zlib nor gzip header */
return deflate_decode_buffer(st, -MAX_WBITS, data, len, new_len);
}
static unsigned char *
deflate_gzip_decode_buffer(struct stream_encoded *st, unsigned char *data, int len, int *new_len)
{
/* detect gzip header, else assume zlib header */
return deflate_decode_buffer(st, MAX_WBITS + 32, data, len, new_len);
}
static void
deflate_close(struct stream_encoded *stream)
{
struct deflate_enc_data *data = (struct deflate_enc_data *) stream->data;
if (data) {
if (!data->after_end) {
inflateEnd(&data->deflate_stream);
}
if (data->fdread != -1) {
close(data->fdread);
}
mem_free(data);
stream->data = 0;
}
}
static const unsigned char *const deflate_extensions[] = { NULL };
const struct decoding_backend deflate_decoding_backend = {
"deflate",
deflate_extensions,
deflate_raw_open,
deflate_read,
deflate_raw_decode_buffer,
deflate_close,
};
static const unsigned char *const gzip_extensions[] = { ".gz", ".tgz", NULL };
const struct decoding_backend gzip_decoding_backend = {
"gzip",
gzip_extensions,
deflate_gzip_open,
deflate_read,
deflate_gzip_decode_buffer,
deflate_close,
};