1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-12-04 14:46:47 -05:00

encoding: Fixed a chunked decompression and added the deflate.

This commit is contained in:
Witold Filipczyk 2008-02-11 15:30:14 +01:00 committed by Kalle Olavi Niemitalo
parent 8e0938d2fc
commit cfb2ef63d6
7 changed files with 230 additions and 282 deletions

View File

@ -2,7 +2,7 @@ top_builddir=../..
include $(top_builddir)/Makefile.config include $(top_builddir)/Makefile.config
OBJS-$(CONFIG_BZIP2) += bzip2.o OBJS-$(CONFIG_BZIP2) += bzip2.o
OBJS-$(CONFIG_GZIP) += gzip.o OBJS-$(CONFIG_GZIP) += deflate.o
OBJS-$(CONFIG_LZMA) += lzma.o LzmaDecode.o OBJS-$(CONFIG_LZMA) += lzma.o LzmaDecode.o
OBJS = encoding.o OBJS = encoding.o

200
src/encoding/deflate.c Normal file
View File

@ -0,0 +1,200 @@
/* deflate/gzip encoding backend */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <string.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_ZLIB_H
#include <zlib.h>
#endif
#include <errno.h>
#include "elinks.h"
#include "encoding/deflate.h"
#include "encoding/encoding.h"
#include "util/memory.h"
/* How many bytes of compressed data to read before decompressing. */
#define ELINKS_DEFLATE_BUFFER_LENGTH 5000
struct deflate_enc_data {
z_stream deflate_stream;
/* The file descriptor from which we read. */
int fdread;
unsigned int last_read:1;
/* A buffer for data that has been read from the file but not
* yet decompressed. z_stream.next_in and z_stream.avail_in
* refer to this buffer. */
unsigned char buf[ELINKS_DEFLATE_BUFFER_LENGTH];
};
static int
deflate_open(struct stream_encoded *stream, int fd)
{
/* A zero-initialized z_stream. The compiler ensures that all
* pointer members in it are null. (Can't do this with memset
* because C99 does not require all-bits-zero to be a null
* pointer.) */
static const z_stream null_z_stream = {0};
int err;
struct deflate_enc_data *data = mem_alloc(sizeof(*data));
stream->data = NULL;
if (!data) {
return -1;
}
/* Initialize all members of *data, except data->buf[], which
* will be initialized on demand by deflate_read. */
copy_struct(&data->deflate_stream, &null_z_stream);
data->fdread = fd;
data->last_read = 0;
err = inflateInit2(&data->deflate_stream, MAX_WBITS | 32);
if (err != Z_OK) {
mem_free(data);
return -1;
}
stream->data = data;
return 0;
}
static int
deflate_read(struct stream_encoded *stream, unsigned char *buf, int len)
{
struct deflate_enc_data *data = (struct deflate_enc_data *) stream->data;
int err = 0;
if (!data) return -1;
assert(len > 0);
if (data->last_read) return 0;
data->deflate_stream.avail_out = len;
data->deflate_stream.next_out = buf;
do {
if (data->deflate_stream.avail_in == 0) {
int l = safe_read(data->fdread, data->buf,
ELINKS_DEFLATE_BUFFER_LENGTH);
if (l == -1) {
if (errno == EAGAIN)
break;
else
return -1; /* I/O error */
} else if (l == 0) {
/* EOF. It is error: we wait for more bytes */
return -1;
}
data->deflate_stream.next_in = data->buf;
data->deflate_stream.avail_in = l;
}
err = inflate(&data->deflate_stream, Z_SYNC_FLUSH);
if (err == Z_STREAM_END) {
data->last_read = 1;
break;
} else if (err != Z_OK) {
return -1;
}
} while (data->deflate_stream.avail_out > 0);
assert(len - data->deflate_stream.avail_out == data->deflate_stream.next_out - buf);
return len - data->deflate_stream.avail_out;
}
static unsigned char *
deflate_decode_buffer(unsigned char *data, int len, int *new_len)
{
z_stream stream;
unsigned char *buffer = NULL;
int error;
if (!len) return NULL;
memset(&stream, 0, sizeof(z_stream));
stream.next_in = data;
stream.avail_in = len;
if (inflateInit2(&stream, MAX_WBITS | 32) != Z_OK)
return NULL;
do {
unsigned char *new_buffer;
size_t size = stream.total_out + MAX_STR_LEN;
new_buffer = mem_realloc(buffer, size);
if (!new_buffer) {
error = Z_MEM_ERROR;
break;
}
buffer = new_buffer;
stream.next_out = buffer + stream.total_out;
stream.avail_out = MAX_STR_LEN;
error = inflate(&stream, Z_SYNC_FLUSH);
if (error == Z_STREAM_END) {
*new_len = stream.total_out;
error = Z_OK;
break;
}
} while (error == Z_OK && stream.avail_in > 0);
inflateEnd(&stream);
if (error != Z_OK) {
if (buffer) mem_free(buffer);
*new_len = 0;
return NULL;
}
return buffer;
}
static void
deflate_close(struct stream_encoded *stream)
{
struct deflate_enc_data *data = (struct deflate_enc_data *) stream->data;
if (data) {
inflateEnd(&data->deflate_stream);
close(data->fdread);
mem_free(data);
stream->data = 0;
}
}
static const unsigned char *const deflate_extensions[] = { NULL };
const struct decoding_backend deflate_decoding_backend = {
"deflate",
deflate_extensions,
deflate_open,
deflate_read,
deflate_decode_buffer,
deflate_close,
};
static const unsigned char *const gzip_extensions[] = { ".gz", ".tgz", NULL };
const struct decoding_backend gzip_decoding_backend = {
"gzip",
gzip_extensions,
deflate_open,
deflate_read,
deflate_decode_buffer,
deflate_close,
};

View File

@ -1,11 +1,13 @@
#ifndef EL__ENCODING_GZIP_H #ifndef EL__ENCODING_DEFLATE_H
#define EL__ENCODING_GZIP_H #define EL__ENCODING_DEFLATE_H
#include "encoding/encoding.h" #include "encoding/encoding.h"
#ifdef CONFIG_GZIP #ifdef CONFIG_GZIP
extern const struct decoding_backend deflate_decoding_backend;
extern const struct decoding_backend gzip_decoding_backend; extern const struct decoding_backend gzip_decoding_backend;
#else #else
#define deflate_decoding_backend dummy_decoding_backend
#define gzip_decoding_backend dummy_decoding_backend #define gzip_decoding_backend dummy_decoding_backend
#endif #endif

View File

@ -84,7 +84,7 @@ static const struct decoding_backend dummy_decoding_backend = {
/* Dynamic backend area */ /* Dynamic backend area */
#include "encoding/bzip2.h" #include "encoding/bzip2.h"
#include "encoding/gzip.h" #include "encoding/deflate.h"
#include "encoding/lzma.h" #include "encoding/lzma.h"
static const struct decoding_backend *const decoding_backends[] = { static const struct decoding_backend *const decoding_backends[] = {
@ -92,6 +92,7 @@ static const struct decoding_backend *const decoding_backends[] = {
&gzip_decoding_backend, &gzip_decoding_backend,
&bzip2_decoding_backend, &bzip2_decoding_backend,
&lzma_decoding_backend, &lzma_decoding_backend,
&deflate_decoding_backend,
}; };

View File

@ -9,6 +9,7 @@ enum stream_encoding {
ENCODING_GZIP, ENCODING_GZIP,
ENCODING_BZIP2, ENCODING_BZIP2,
ENCODING_LZMA, ENCODING_LZMA,
ENCODING_DEFLATE,
/* Max. number of known encoding including ENCODING_NONE. */ /* Max. number of known encoding including ENCODING_NONE. */
ENCODINGS_KNOWN, ENCODINGS_KNOWN,

View File

@ -1,241 +0,0 @@
/* Gzip encoding (ENCODING_GZIP) backend */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <string.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_ZLIB_H
#include <zlib.h>
#endif
#include "elinks.h"
#include "encoding/encoding.h"
#include "encoding/gzip.h"
#include "osdep/osdep.h"
#include "util/memory.h"
static int
gzip_open(struct stream_encoded *stream, int fd)
{
stream->data = (void *) gzdopen(fd, "rb");
if (!stream->data) return -1;
return 0;
}
static int
gzip_read(struct stream_encoded *stream, unsigned char *data, int len)
{
gzclearerr((gzFile *) stream->data);
return gzread((gzFile *) stream->data, data, len);
}
/* The following code for decoding gzip in memory is a mix of code from zlib's
* gzio.c file copyrighted 1995-2002 by Jean-loup Gailly and the costumized
* header extraction in the linux kernels lib/inflate.c file not copyrighted
* 1992 by Mark Adler. */
static int gzip_header_magic[2] = { 0x1f, 0x8b };
enum gzip_header_flag {
GZIP_ASCII_TEXT = 0x01, /* File probably ascii text (unused) */
GZIP_HEADER_CRC = 0x02, /* Header CRC present */
GZIP_EXTRA_FIELD = 0x04, /* Extra field present */
GZIP_ORIG_NAME = 0x08, /* Original file name present */
GZIP_COMMENT = 0x10, /* File comment present */
GZIP_RESERVED = 0xE0, /* bits 5..7: reserved */
};
/* Read a byte from a gz_stream; update next_in and avail_in. Return EOF for
* end of file. */
static int
get_gzip_byte(z_stream *stream)
{
if (stream->avail_in == 0)
return EOF;
stream->avail_in--;
return *(stream->next_in)++;
}
#define skip_gzip_bytes(stream, bytes) \
do { int i = bytes; while (i-- > 0) get_gzip_byte(stream); } while (0)
#define skip_gzip_string(stream) \
do { int i; while ((i = get_gzip_byte(stream)) != 0 && i != EOF) ; } while (0)
/* Check the gzip header of a gz_stream opened for reading. Set the stream mode
* to transparent if the gzip magic header is not present; set s->err to
* Z_DATA_ERROR if the magic header is present but the rest of the header is
* incorrect. */
static int
skip_gzip_header(z_stream *stream)
{
unsigned int len;
int method; /* method byte */
int flags; /* flags byte */
/* Check the gzip magic header */
for (len = 0; len < 2; len++) {
int byte = get_gzip_byte(stream);
if (byte != gzip_header_magic[len]) {
if (len != 0) {
stream->avail_in++;
stream->next_in--;
}
if (byte != EOF) {
stream->avail_in++;
stream->next_in--;
}
return stream->avail_in != 0 ? Z_OK : Z_STREAM_END;
}
}
method = get_gzip_byte(stream);
flags = get_gzip_byte(stream);
if (method != Z_DEFLATED || (flags & GZIP_RESERVED) != 0)
return Z_DATA_ERROR;
/* Discard time, xflags and OS code: */
skip_gzip_bytes(stream, 6);
if (flags & GZIP_EXTRA_FIELD) {
/* Skip the extra field */
len = (unsigned int) get_gzip_byte(stream);
len += ((unsigned int) get_gzip_byte(stream)) << 8;
/* If EOF is encountered @len is garbage, but the loop below
* will quit anyway. */
while (len-- > 0 && get_gzip_byte(stream) != EOF) ;
}
/* Skip the original file name */
if (flags & GZIP_ORIG_NAME)
skip_gzip_string(stream);
/* Skip the .gz file comment */
if (flags & GZIP_COMMENT)
skip_gzip_string(stream);
/* Skip the header CRC */
if (flags & GZIP_HEADER_CRC)
skip_gzip_bytes(stream, 2);
return Z_OK;
}
/* Freaking dammit. This is impossible for me to get working. */
static unsigned char *
gzip_decode_buffer(unsigned char *data, int len, int *new_len)
{
unsigned char *buffer = NULL;
int error = Z_OK;
int tries, wbits;
/* This WBITS loop thing was something I got from
* http://lists.infradead.org/pipermail/linux-mtd/2002-March/004429.html
* but it doesn't fix it. :/ --jonas */
/* -MAX_WBITS impiles -> suppress zlib header and adler32. try first
* with -MAX_WBITS, if that fails, try MAX_WBITS to be backwards
* compatible */
wbits = -MAX_WBITS;
for (tries = 0; tries < 2; tries++) {
z_stream stream;
memset(&stream, 0, sizeof(z_stream));
/* FIXME: Use inflateInit2() to configure low memory
* usage for CONFIG_SMALL configurations. --jonas */
error = inflateInit2(&stream, wbits);
if (error != Z_OK) break;
stream.next_in = (char *)data;
stream.avail_in = len;
error = skip_gzip_header(&stream);
if (error != Z_OK) {
stream.next_in = (char *)data;
stream.avail_in = len;
}
do {
unsigned char *new_buffer;
size_t size = stream.total_out + MAX_STR_LEN;
assert(stream.total_out >= 0);
assert(stream.next_in);
new_buffer = mem_realloc(buffer, size);
if (!new_buffer) {
error = Z_MEM_ERROR;
break;
}
buffer = new_buffer;
stream.next_out = buffer + stream.total_out;
stream.avail_out = MAX_STR_LEN;
error = inflate(&stream, Z_NO_FLUSH);
if (error == Z_STREAM_END) {
/* Here gzio.c has some detection of
* concatenated .gz files and will do a gzip
* header skip and an inflateReset() call
* before continuing. It partly uses CRC to
* detect that. */
*new_len = stream.total_out;
error = Z_OK;
break;
}
} while (error == Z_OK && stream.avail_in > 0);
inflateEnd(&stream);
if (error != Z_DATA_ERROR)
break;
/* Try again with next wbits */
wbits = -wbits;
}
if (error != Z_OK) {
if (buffer) mem_free(buffer);
*new_len = 0;
return NULL;
}
return buffer;
}
static void
gzip_close(struct stream_encoded *stream)
{
gzclose((gzFile *) stream->data);
}
static const unsigned char *const gzip_extensions[] = { ".gz", ".tgz", NULL };
const struct decoding_backend gzip_decoding_backend = {
"gzip",
gzip_extensions,
gzip_open,
gzip_read,
gzip_decode_buffer,
gzip_close,
};

View File

@ -759,7 +759,7 @@ http_send_header(struct socket *socket)
add_to_string(&header, ", "); add_to_string(&header, ", ");
#endif #endif
add_to_string(&header, "gzip"); add_to_string(&header, "deflate, gzip");
#endif #endif
add_crlf_to_string(&header); add_crlf_to_string(&header);
#endif #endif
@ -995,28 +995,23 @@ decompress_data(struct connection *conn, unsigned char *data, int len,
int *new_len) int *new_len)
{ {
struct http_connection_info *http = conn->info; struct http_connection_info *http = conn->info;
/* to_read is number of bytes to be read from the decoder. It is 65536
* (then we are just emptying the decoder buffer as we finished the walk
* through the incoming stream already) or PIPE_BUF / 2 (when we are
* still walking through the stream - then we write PIPE_BUF / 2 to the
* pipe and read it back to the decoder ASAP; the point is that we can't
* write more than PIPE_BUF to the pipe at once, but we also have to
* never let read_encoded() (gzread(), in fact) to empty the pipe - that
* causes further malfunction of zlib :[ ... so we will make sure that
* we will always have at least PIPE_BUF / 2 + 1 in the pipe (returning
* early otherwise)). */
enum { NORMAL, FINISHING } state = NORMAL; enum { NORMAL, FINISHING } state = NORMAL;
int did_read = 0; int did_read = 0;
int *length_of_block; int *length_of_block;
unsigned char *output = NULL; unsigned char *output = NULL;
length_of_block = (http->length == LEN_CHUNKED ? &http->chunk_remaining
: &http->length);
#define BIG_READ 65536 #define BIG_READ 65536
if (!*length_of_block) {
/* Going to finish this decoding bussiness. */ if (http->length == LEN_CHUNKED) {
state = FINISHING; if (http->chunk_remaining == CHUNK_ZERO_SIZE)
state = FINISHING;
length_of_block = &http->chunk_remaining;
} else {
length_of_block = &http->length;
if (!*length_of_block) {
/* Going to finish this decoding bussiness. */
state = FINISHING;
}
} }
if (conn->content_encoding == ENCODING_NONE) { if (conn->content_encoding == ENCODING_NONE) {
@ -1035,16 +1030,9 @@ decompress_data(struct connection *conn, unsigned char *data, int len,
} }
do { do {
/* The initial value is used only when state == NORMAL.
* Unconditional initialization avoids a GCC warning. */
int to_read = PIPE_BUF / 2;
if (state == NORMAL) { if (state == NORMAL) {
/* ... we aren't finishing yet. */ /* ... we aren't finishing yet. */
int written; int written = safe_write(conn->stream_pipes[1], data, len);
written = safe_write(conn->stream_pipes[1], data,
len > to_read ? to_read : len);
if (written > 0) { if (written > 0) {
data += written; data += written;
@ -1084,14 +1072,13 @@ decompress_data(struct connection *conn, unsigned char *data, int len,
did_read = read_encoded(conn->stream, output + *new_len, BIG_READ); did_read = read_encoded(conn->stream, output + *new_len, BIG_READ);
if (did_read > 0) *new_len += did_read; if (did_read > 0) *new_len += did_read;
else if (did_read == -1) { else {
mem_free_set(&output, NULL); if (did_read < 0) state = FINISHING;
*new_len = 0; break;
break; /* Loop prevention (bug 517), is this correct ? --Zas */
} }
} while (len || did_read == BIG_READ); } while (len || (did_read == BIG_READ));
shutdown_connection_stream(conn); if (state == FINISHING) shutdown_connection_stream(conn);
return output; return output;
} }
@ -1218,11 +1205,8 @@ read_chunked_http_data(struct connection *conn, struct read_buffer *rb)
} else { } else {
unsigned char *data; unsigned char *data;
int data_len; int data_len;
int len;
int zero = (http->chunk_remaining == CHUNK_ZERO_SIZE); int zero = (http->chunk_remaining == CHUNK_ZERO_SIZE);
int len = zero ? 0 : http->chunk_remaining;
if (zero) http->chunk_remaining = 0;
len = http->chunk_remaining;
/* Maybe everything necessary didn't come yet.. */ /* Maybe everything necessary didn't come yet.. */
int_upper_bound(&len, rb->length); int_upper_bound(&len, rb->length);
@ -1863,6 +1847,8 @@ again:
if (file_encoding != ENCODING_GZIP if (file_encoding != ENCODING_GZIP
&& (!strcasecmp(d, "gzip") || !strcasecmp(d, "x-gzip"))) && (!strcasecmp(d, "gzip") || !strcasecmp(d, "x-gzip")))
conn->content_encoding = ENCODING_GZIP; conn->content_encoding = ENCODING_GZIP;
if (!strcasecmp(d, "deflate") || !strcasecmp(d, "x-deflate"))
conn->content_encoding = ENCODING_DEFLATE;
#endif #endif
#ifdef CONFIG_BZIP2 #ifdef CONFIG_BZIP2
@ -1879,8 +1865,7 @@ again:
conn->cached->encoding_info = stracpy(get_encoding_name(conn->content_encoding)); conn->cached->encoding_info = stracpy(get_encoding_name(conn->content_encoding));
} }
if (http->length == -1 if (http->length == -1 || http->close)
|| (PRE_HTTP_1_1(http->recv_version) && http->close))
socket->state = SOCKET_END_ONCLOSE; socket->state = SOCKET_END_ONCLOSE;
read_http_data(socket, rb); read_http_data(socket, rb);