2008-02-11 09:30:14 -05:00
|
|
|
/* deflate/gzip encoding backend */
|
|
|
|
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
#include "config.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#ifdef HAVE_UNISTD_H
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif
|
|
|
|
#ifdef HAVE_ZLIB_H
|
|
|
|
#include <zlib.h>
|
|
|
|
#endif
|
|
|
|
#include <errno.h>
|
|
|
|
|
|
|
|
#include "elinks.h"
|
|
|
|
|
|
|
|
#include "encoding/deflate.h"
|
|
|
|
#include "encoding/encoding.h"
|
|
|
|
#include "util/memory.h"
|
|
|
|
|
|
|
|
/* How many bytes of compressed data to read before decompressing. */
|
|
|
|
#define ELINKS_DEFLATE_BUFFER_LENGTH 5000
|
|
|
|
|
|
|
|
struct deflate_enc_data {
|
|
|
|
z_stream deflate_stream;
|
|
|
|
|
|
|
|
/* The file descriptor from which we read. */
|
|
|
|
int fdread;
|
|
|
|
|
|
|
|
unsigned int last_read:1;
|
2008-07-20 06:01:49 -04:00
|
|
|
unsigned int after_first_read:1;
|
2010-09-24 10:12:35 -04:00
|
|
|
unsigned int after_end:1;
|
2008-02-11 09:30:14 -05:00
|
|
|
|
|
|
|
/* A buffer for data that has been read from the file but not
|
|
|
|
* yet decompressed. z_stream.next_in and z_stream.avail_in
|
|
|
|
* refer to this buffer. */
|
|
|
|
unsigned char buf[ELINKS_DEFLATE_BUFFER_LENGTH];
|
|
|
|
};
|
|
|
|
|
|
|
|
static int
|
2008-02-26 17:41:26 -05:00
|
|
|
deflate_open(int window_size, struct stream_encoded *stream, int fd)
|
2008-02-11 09:30:14 -05:00
|
|
|
{
|
|
|
|
/* A zero-initialized z_stream. The compiler ensures that all
|
|
|
|
* pointer members in it are null. (Can't do this with memset
|
|
|
|
* because C99 does not require all-bits-zero to be a null
|
|
|
|
* pointer.) */
|
|
|
|
static const z_stream null_z_stream = {0};
|
|
|
|
int err;
|
|
|
|
|
|
|
|
struct deflate_enc_data *data = mem_alloc(sizeof(*data));
|
|
|
|
|
|
|
|
stream->data = NULL;
|
|
|
|
if (!data) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Initialize all members of *data, except data->buf[], which
|
|
|
|
* will be initialized on demand by deflate_read. */
|
|
|
|
copy_struct(&data->deflate_stream, &null_z_stream);
|
|
|
|
data->fdread = fd;
|
|
|
|
data->last_read = 0;
|
2008-07-20 06:01:49 -04:00
|
|
|
data->after_first_read = 0;
|
2008-02-11 09:30:14 -05:00
|
|
|
|
2008-02-26 17:41:26 -05:00
|
|
|
err = inflateInit2(&data->deflate_stream, window_size);
|
2008-02-11 09:30:14 -05:00
|
|
|
if (err != Z_OK) {
|
|
|
|
mem_free(data);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
stream->data = data;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-07-20 06:01:49 -04:00
|
|
|
#if 0
|
2008-02-26 17:41:26 -05:00
|
|
|
static int
|
|
|
|
deflate_raw_open(struct stream_encoded *stream, int fd)
|
|
|
|
{
|
|
|
|
/* raw DEFLATE with neither zlib nor gzip header */
|
|
|
|
return deflate_open(-MAX_WBITS, stream, fd);
|
|
|
|
}
|
2008-07-20 06:01:49 -04:00
|
|
|
#endif
|
2008-02-26 17:41:26 -05:00
|
|
|
|
|
|
|
static int
|
|
|
|
deflate_gzip_open(struct stream_encoded *stream, int fd)
|
|
|
|
{
|
|
|
|
/* detect gzip header, else assume zlib header */
|
|
|
|
return deflate_open(MAX_WBITS + 32, stream, fd);
|
|
|
|
}
|
|
|
|
|
2008-02-11 09:30:14 -05:00
|
|
|
static int
|
|
|
|
deflate_read(struct stream_encoded *stream, unsigned char *buf, int len)
|
|
|
|
{
|
|
|
|
struct deflate_enc_data *data = (struct deflate_enc_data *) stream->data;
|
|
|
|
int err = 0;
|
2008-07-20 07:32:56 -04:00
|
|
|
int l = 0;
|
2008-02-11 09:30:14 -05:00
|
|
|
|
|
|
|
if (!data) return -1;
|
|
|
|
|
|
|
|
assert(len > 0);
|
|
|
|
|
|
|
|
if (data->last_read) return 0;
|
|
|
|
|
|
|
|
data->deflate_stream.avail_out = len;
|
|
|
|
data->deflate_stream.next_out = buf;
|
|
|
|
|
|
|
|
do {
|
|
|
|
if (data->deflate_stream.avail_in == 0) {
|
2008-07-20 06:01:49 -04:00
|
|
|
l = safe_read(data->fdread, data->buf,
|
2008-02-11 09:30:14 -05:00
|
|
|
ELINKS_DEFLATE_BUFFER_LENGTH);
|
|
|
|
|
|
|
|
if (l == -1) {
|
|
|
|
if (errno == EAGAIN)
|
|
|
|
break;
|
|
|
|
else
|
|
|
|
return -1; /* I/O error */
|
|
|
|
} else if (l == 0) {
|
|
|
|
/* EOF. It is error: we wait for more bytes */
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
data->deflate_stream.next_in = data->buf;
|
|
|
|
data->deflate_stream.avail_in = l;
|
|
|
|
}
|
2008-07-20 06:01:49 -04:00
|
|
|
restart:
|
2008-02-11 09:30:14 -05:00
|
|
|
err = inflate(&data->deflate_stream, Z_SYNC_FLUSH);
|
2009-02-21 06:27:01 -05:00
|
|
|
if (err == Z_DATA_ERROR && !data->after_first_read
|
|
|
|
&& data->deflate_stream.next_out == buf) {
|
2008-07-20 07:30:18 -04:00
|
|
|
/* RFC 2616 requires a zlib header for
|
|
|
|
* "Content-Encoding: deflate", but some HTTP
|
|
|
|
* servers (Microsoft-IIS/6.0 at blogs.msdn.com,
|
|
|
|
* and reportedly Apache with mod_deflate) omit
|
|
|
|
* that, causing Z_DATA_ERROR. Clarification of
|
|
|
|
* the term "deflate" has been requested for the
|
|
|
|
* next version of HTTP:
|
|
|
|
* http://www3.tools.ietf.org/wg/httpbis/trac/ticket/73
|
|
|
|
*
|
|
|
|
* Try to recover by telling zlib not to expect
|
|
|
|
* the header. If the error does not happen on
|
|
|
|
* the first inflate() call, then it is too late
|
|
|
|
* to recover because ELinks may already have
|
|
|
|
* discarded part of the input data.
|
|
|
|
*
|
|
|
|
* TODO: This fallback to raw DEFLATE is currently
|
|
|
|
* enabled for "Content-Encoding: gzip" too. It
|
|
|
|
* might be better to fall back to no compression
|
|
|
|
* at all, because Apache can send that header for
|
|
|
|
* uncompressed *.gz.md5 files. */
|
2008-07-20 06:01:49 -04:00
|
|
|
data->after_first_read = 1;
|
|
|
|
inflateEnd(&data->deflate_stream);
|
|
|
|
data->deflate_stream.avail_out = len;
|
|
|
|
data->deflate_stream.next_out = buf;
|
|
|
|
data->deflate_stream.next_in = data->buf;
|
|
|
|
data->deflate_stream.avail_in = l;
|
|
|
|
err = inflateInit2(&data->deflate_stream, -MAX_WBITS);
|
|
|
|
if (err == Z_OK) goto restart;
|
|
|
|
}
|
|
|
|
data->after_first_read = 1;
|
2008-02-11 09:30:14 -05:00
|
|
|
if (err == Z_STREAM_END) {
|
|
|
|
data->last_read = 1;
|
|
|
|
break;
|
|
|
|
} else if (err != Z_OK) {
|
2009-02-21 06:27:01 -05:00
|
|
|
data->last_read = 1;
|
|
|
|
break;
|
2008-02-11 09:30:14 -05:00
|
|
|
}
|
|
|
|
} while (data->deflate_stream.avail_out > 0);
|
|
|
|
|
|
|
|
assert(len - data->deflate_stream.avail_out == data->deflate_stream.next_out - buf);
|
|
|
|
return len - data->deflate_stream.avail_out;
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned char *
|
2010-09-24 10:12:35 -04:00
|
|
|
deflate_decode_buffer(struct stream_encoded *st, int window_size, unsigned char *data, int len, int *new_len)
|
2008-02-11 09:30:14 -05:00
|
|
|
{
|
2010-09-24 10:12:35 -04:00
|
|
|
struct deflate_enc_data *enc_data = (struct deflate_enc_data *) st->data;
|
|
|
|
z_stream *stream = &enc_data->deflate_stream;
|
2008-02-11 09:30:14 -05:00
|
|
|
unsigned char *buffer = NULL;
|
|
|
|
int error;
|
|
|
|
|
2008-02-17 11:55:41 -05:00
|
|
|
*new_len = 0; /* default, left there if an error occurs */
|
|
|
|
|
2008-02-11 09:30:14 -05:00
|
|
|
if (!len) return NULL;
|
2010-09-24 10:12:35 -04:00
|
|
|
stream->next_in = data;
|
|
|
|
stream->avail_in = len;
|
|
|
|
stream->total_out = 0;
|
2008-02-11 09:30:14 -05:00
|
|
|
|
|
|
|
do {
|
|
|
|
unsigned char *new_buffer;
|
2010-09-24 10:12:35 -04:00
|
|
|
size_t size = stream->total_out + MAX_STR_LEN;
|
2008-02-11 09:30:14 -05:00
|
|
|
|
|
|
|
new_buffer = mem_realloc(buffer, size);
|
|
|
|
if (!new_buffer) {
|
|
|
|
error = Z_MEM_ERROR;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
buffer = new_buffer;
|
2010-09-24 10:12:35 -04:00
|
|
|
stream->next_out = buffer + stream->total_out;
|
|
|
|
stream->avail_out = MAX_STR_LEN;
|
2008-02-11 09:30:14 -05:00
|
|
|
|
2010-09-24 10:12:35 -04:00
|
|
|
error = inflate(stream, Z_SYNC_FLUSH);
|
2008-02-11 09:30:14 -05:00
|
|
|
if (error == Z_STREAM_END) {
|
|
|
|
break;
|
|
|
|
}
|
2010-09-24 10:12:35 -04:00
|
|
|
} while (error == Z_OK && stream->avail_in > 0);
|
2008-02-11 09:30:14 -05:00
|
|
|
|
2010-09-24 10:12:35 -04:00
|
|
|
if (error == Z_STREAM_END) {
|
|
|
|
inflateEnd(stream);
|
|
|
|
enc_data->after_end = 1;
|
|
|
|
error = Z_OK;
|
|
|
|
}
|
2008-02-11 09:30:14 -05:00
|
|
|
|
2008-02-17 11:55:41 -05:00
|
|
|
if (error == Z_OK) {
|
2010-09-24 10:12:35 -04:00
|
|
|
*new_len = stream->total_out;
|
2008-02-17 11:55:41 -05:00
|
|
|
return buffer;
|
|
|
|
} else {
|
2008-02-11 09:30:14 -05:00
|
|
|
if (buffer) mem_free(buffer);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-02-26 17:41:26 -05:00
|
|
|
static unsigned char *
|
2010-09-24 10:12:35 -04:00
|
|
|
deflate_raw_decode_buffer(struct stream_encoded *st, unsigned char *data, int len, int *new_len)
|
2008-02-26 17:41:26 -05:00
|
|
|
{
|
|
|
|
/* raw DEFLATE with neither zlib nor gzip header */
|
2010-09-24 10:12:35 -04:00
|
|
|
return deflate_decode_buffer(st, -MAX_WBITS, data, len, new_len);
|
2008-02-26 17:41:26 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned char *
|
2010-09-24 10:12:35 -04:00
|
|
|
deflate_gzip_decode_buffer(struct stream_encoded *st, unsigned char *data, int len, int *new_len)
|
2008-02-26 17:41:26 -05:00
|
|
|
{
|
|
|
|
/* detect gzip header, else assume zlib header */
|
2010-09-24 10:12:35 -04:00
|
|
|
return deflate_decode_buffer(st, MAX_WBITS + 32, data, len, new_len);
|
2008-02-26 17:41:26 -05:00
|
|
|
}
|
|
|
|
|
2008-02-11 09:30:14 -05:00
|
|
|
static void
|
|
|
|
deflate_close(struct stream_encoded *stream)
|
|
|
|
{
|
|
|
|
struct deflate_enc_data *data = (struct deflate_enc_data *) stream->data;
|
|
|
|
|
|
|
|
if (data) {
|
2010-09-24 10:12:35 -04:00
|
|
|
if (!data->after_end) {
|
|
|
|
inflateEnd(&data->deflate_stream);
|
|
|
|
}
|
|
|
|
if (data->fdread != -1) {
|
|
|
|
close(data->fdread);
|
|
|
|
}
|
2008-02-11 09:30:14 -05:00
|
|
|
mem_free(data);
|
|
|
|
stream->data = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static const unsigned char *const deflate_extensions[] = { NULL };
|
|
|
|
|
|
|
|
const struct decoding_backend deflate_decoding_backend = {
|
|
|
|
"deflate",
|
|
|
|
deflate_extensions,
|
2008-07-20 06:01:49 -04:00
|
|
|
deflate_gzip_open,
|
2008-02-11 09:30:14 -05:00
|
|
|
deflate_read,
|
2008-02-26 17:41:26 -05:00
|
|
|
deflate_raw_decode_buffer,
|
2008-02-11 09:30:14 -05:00
|
|
|
deflate_close,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const unsigned char *const gzip_extensions[] = { ".gz", ".tgz", NULL };
|
|
|
|
|
|
|
|
const struct decoding_backend gzip_decoding_backend = {
|
|
|
|
"gzip",
|
|
|
|
gzip_extensions,
|
2008-02-26 17:41:26 -05:00
|
|
|
deflate_gzip_open,
|
2008-02-11 09:30:14 -05:00
|
|
|
deflate_read,
|
2008-02-26 17:41:26 -05:00
|
|
|
deflate_gzip_decode_buffer,
|
2008-02-11 09:30:14 -05:00
|
|
|
deflate_close,
|
|
|
|
};
|