diff --git a/src/encoding/Makefile b/src/encoding/Makefile index d51b4166c..938497baa 100644 --- a/src/encoding/Makefile +++ b/src/encoding/Makefile @@ -2,7 +2,7 @@ top_builddir=../.. include $(top_builddir)/Makefile.config OBJS-$(CONFIG_BZIP2) += bzip2.o -OBJS-$(CONFIG_GZIP) += gzip.o +OBJS-$(CONFIG_GZIP) += deflate.o OBJS-$(CONFIG_LZMA) += lzma.o LzmaDecode.o OBJS = encoding.o diff --git a/src/encoding/deflate.c b/src/encoding/deflate.c new file mode 100644 index 000000000..c211942f3 --- /dev/null +++ b/src/encoding/deflate.c @@ -0,0 +1,200 @@ +/* deflate/gzip encoding backend */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_ZLIB_H +#include +#endif +#include + +#include "elinks.h" + +#include "encoding/deflate.h" +#include "encoding/encoding.h" +#include "util/memory.h" + +/* How many bytes of compressed data to read before decompressing. */ +#define ELINKS_DEFLATE_BUFFER_LENGTH 5000 + +struct deflate_enc_data { + z_stream deflate_stream; + + /* The file descriptor from which we read. */ + int fdread; + + unsigned int last_read:1; + + /* A buffer for data that has been read from the file but not + * yet decompressed. z_stream.next_in and z_stream.avail_in + * refer to this buffer. */ + unsigned char buf[ELINKS_DEFLATE_BUFFER_LENGTH]; +}; + +static int +deflate_open(struct stream_encoded *stream, int fd) +{ + /* A zero-initialized z_stream. The compiler ensures that all + * pointer members in it are null. (Can't do this with memset + * because C99 does not require all-bits-zero to be a null + * pointer.) */ + static const z_stream null_z_stream = {0}; + int err; + + struct deflate_enc_data *data = mem_alloc(sizeof(*data)); + + stream->data = NULL; + if (!data) { + return -1; + } + + /* Initialize all members of *data, except data->buf[], which + * will be initialized on demand by deflate_read. */ + copy_struct(&data->deflate_stream, &null_z_stream); + data->fdread = fd; + data->last_read = 0; + + err = inflateInit2(&data->deflate_stream, MAX_WBITS | 32); + if (err != Z_OK) { + mem_free(data); + return -1; + } + stream->data = data; + + return 0; +} + +static int +deflate_read(struct stream_encoded *stream, unsigned char *buf, int len) +{ + struct deflate_enc_data *data = (struct deflate_enc_data *) stream->data; + int err = 0; + + if (!data) return -1; + + assert(len > 0); + + if (data->last_read) return 0; + + data->deflate_stream.avail_out = len; + data->deflate_stream.next_out = buf; + + do { + if (data->deflate_stream.avail_in == 0) { + int l = safe_read(data->fdread, data->buf, + ELINKS_DEFLATE_BUFFER_LENGTH); + + if (l == -1) { + if (errno == EAGAIN) + break; + else + return -1; /* I/O error */ + } else if (l == 0) { + /* EOF. It is error: we wait for more bytes */ + return -1; + } + + data->deflate_stream.next_in = data->buf; + data->deflate_stream.avail_in = l; + } + err = inflate(&data->deflate_stream, Z_SYNC_FLUSH); + if (err == Z_STREAM_END) { + data->last_read = 1; + break; + } else if (err != Z_OK) { + return -1; + } + } while (data->deflate_stream.avail_out > 0); + + assert(len - data->deflate_stream.avail_out == data->deflate_stream.next_out - buf); + return len - data->deflate_stream.avail_out; +} + +static unsigned char * +deflate_decode_buffer(unsigned char *data, int len, int *new_len) +{ + z_stream stream; + unsigned char *buffer = NULL; + int error; + + if (!len) return NULL; + memset(&stream, 0, sizeof(z_stream)); + stream.next_in = data; + stream.avail_in = len; + + if (inflateInit2(&stream, MAX_WBITS | 32) != Z_OK) + return NULL; + + do { + unsigned char *new_buffer; + size_t size = stream.total_out + MAX_STR_LEN; + + new_buffer = mem_realloc(buffer, size); + if (!new_buffer) { + error = Z_MEM_ERROR; + break; + } + + buffer = new_buffer; + stream.next_out = buffer + stream.total_out; + stream.avail_out = MAX_STR_LEN; + + error = inflate(&stream, Z_SYNC_FLUSH); + if (error == Z_STREAM_END) { + *new_len = stream.total_out; + error = Z_OK; + break; + } + } while (error == Z_OK && stream.avail_in > 0); + + inflateEnd(&stream); + + if (error != Z_OK) { + if (buffer) mem_free(buffer); + *new_len = 0; + return NULL; + } + + return buffer; +} + +static void +deflate_close(struct stream_encoded *stream) +{ + struct deflate_enc_data *data = (struct deflate_enc_data *) stream->data; + + if (data) { + inflateEnd(&data->deflate_stream); + close(data->fdread); + mem_free(data); + stream->data = 0; + } +} + +static const unsigned char *const deflate_extensions[] = { NULL }; + +const struct decoding_backend deflate_decoding_backend = { + "deflate", + deflate_extensions, + deflate_open, + deflate_read, + deflate_decode_buffer, + deflate_close, +}; + +static const unsigned char *const gzip_extensions[] = { ".gz", ".tgz", NULL }; + +const struct decoding_backend gzip_decoding_backend = { + "gzip", + gzip_extensions, + deflate_open, + deflate_read, + deflate_decode_buffer, + deflate_close, +}; diff --git a/src/encoding/gzip.h b/src/encoding/deflate.h similarity index 50% rename from src/encoding/gzip.h rename to src/encoding/deflate.h index c13038e33..4d3d2c3ae 100644 --- a/src/encoding/gzip.h +++ b/src/encoding/deflate.h @@ -1,11 +1,13 @@ -#ifndef EL__ENCODING_GZIP_H -#define EL__ENCODING_GZIP_H +#ifndef EL__ENCODING_DEFLATE_H +#define EL__ENCODING_DEFLATE_H #include "encoding/encoding.h" #ifdef CONFIG_GZIP +extern const struct decoding_backend deflate_decoding_backend; extern const struct decoding_backend gzip_decoding_backend; #else +#define deflate_decoding_backend dummy_decoding_backend #define gzip_decoding_backend dummy_decoding_backend #endif diff --git a/src/encoding/encoding.c b/src/encoding/encoding.c index 42dc62b16..4d8c2d35b 100644 --- a/src/encoding/encoding.c +++ b/src/encoding/encoding.c @@ -84,7 +84,7 @@ static const struct decoding_backend dummy_decoding_backend = { /* Dynamic backend area */ #include "encoding/bzip2.h" -#include "encoding/gzip.h" +#include "encoding/deflate.h" #include "encoding/lzma.h" static const struct decoding_backend *const decoding_backends[] = { @@ -92,6 +92,7 @@ static const struct decoding_backend *const decoding_backends[] = { &gzip_decoding_backend, &bzip2_decoding_backend, &lzma_decoding_backend, + &deflate_decoding_backend, }; diff --git a/src/encoding/encoding.h b/src/encoding/encoding.h index 8b32c3b38..00b4ab2cf 100644 --- a/src/encoding/encoding.h +++ b/src/encoding/encoding.h @@ -9,6 +9,7 @@ enum stream_encoding { ENCODING_GZIP, ENCODING_BZIP2, ENCODING_LZMA, + ENCODING_DEFLATE, /* Max. number of known encoding including ENCODING_NONE. */ ENCODINGS_KNOWN, diff --git a/src/encoding/gzip.c b/src/encoding/gzip.c deleted file mode 100644 index a46800078..000000000 --- a/src/encoding/gzip.c +++ /dev/null @@ -1,241 +0,0 @@ -/* Gzip encoding (ENCODING_GZIP) backend */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_ZLIB_H -#include -#endif - -#include "elinks.h" - -#include "encoding/encoding.h" -#include "encoding/gzip.h" -#include "osdep/osdep.h" -#include "util/memory.h" - - -static int -gzip_open(struct stream_encoded *stream, int fd) -{ - stream->data = (void *) gzdopen(fd, "rb"); - if (!stream->data) return -1; - - return 0; -} - -static int -gzip_read(struct stream_encoded *stream, unsigned char *data, int len) -{ - gzclearerr((gzFile *) stream->data); - return gzread((gzFile *) stream->data, data, len); -} - - -/* The following code for decoding gzip in memory is a mix of code from zlib's - * gzio.c file copyrighted 1995-2002 by Jean-loup Gailly and the costumized - * header extraction in the linux kernels lib/inflate.c file not copyrighted - * 1992 by Mark Adler. */ - -static int gzip_header_magic[2] = { 0x1f, 0x8b }; - -enum gzip_header_flag { - GZIP_ASCII_TEXT = 0x01, /* File probably ascii text (unused) */ - GZIP_HEADER_CRC = 0x02, /* Header CRC present */ - GZIP_EXTRA_FIELD = 0x04, /* Extra field present */ - GZIP_ORIG_NAME = 0x08, /* Original file name present */ - GZIP_COMMENT = 0x10, /* File comment present */ - GZIP_RESERVED = 0xE0, /* bits 5..7: reserved */ -}; - -/* Read a byte from a gz_stream; update next_in and avail_in. Return EOF for - * end of file. */ -static int -get_gzip_byte(z_stream *stream) -{ - if (stream->avail_in == 0) - return EOF; - - stream->avail_in--; - - return *(stream->next_in)++; -} - -#define skip_gzip_bytes(stream, bytes) \ - do { int i = bytes; while (i-- > 0) get_gzip_byte(stream); } while (0) - -#define skip_gzip_string(stream) \ - do { int i; while ((i = get_gzip_byte(stream)) != 0 && i != EOF) ; } while (0) - -/* Check the gzip header of a gz_stream opened for reading. Set the stream mode - * to transparent if the gzip magic header is not present; set s->err to - * Z_DATA_ERROR if the magic header is present but the rest of the header is - * incorrect. */ -static int -skip_gzip_header(z_stream *stream) -{ - unsigned int len; - int method; /* method byte */ - int flags; /* flags byte */ - - /* Check the gzip magic header */ - for (len = 0; len < 2; len++) { - int byte = get_gzip_byte(stream); - - if (byte != gzip_header_magic[len]) { - if (len != 0) { - stream->avail_in++; - stream->next_in--; - } - - if (byte != EOF) { - stream->avail_in++; - stream->next_in--; - } - - return stream->avail_in != 0 ? Z_OK : Z_STREAM_END; - } - } - - method = get_gzip_byte(stream); - flags = get_gzip_byte(stream); - - if (method != Z_DEFLATED || (flags & GZIP_RESERVED) != 0) - return Z_DATA_ERROR; - - /* Discard time, xflags and OS code: */ - skip_gzip_bytes(stream, 6); - - if (flags & GZIP_EXTRA_FIELD) { - /* Skip the extra field */ - len = (unsigned int) get_gzip_byte(stream); - len += ((unsigned int) get_gzip_byte(stream)) << 8; - - /* If EOF is encountered @len is garbage, but the loop below - * will quit anyway. */ - while (len-- > 0 && get_gzip_byte(stream) != EOF) ; - } - - /* Skip the original file name */ - if (flags & GZIP_ORIG_NAME) - skip_gzip_string(stream); - - /* Skip the .gz file comment */ - if (flags & GZIP_COMMENT) - skip_gzip_string(stream); - - /* Skip the header CRC */ - if (flags & GZIP_HEADER_CRC) - skip_gzip_bytes(stream, 2); - - return Z_OK; -} - - -/* Freaking dammit. This is impossible for me to get working. */ -static unsigned char * -gzip_decode_buffer(unsigned char *data, int len, int *new_len) -{ - unsigned char *buffer = NULL; - int error = Z_OK; - int tries, wbits; - - /* This WBITS loop thing was something I got from - * http://lists.infradead.org/pipermail/linux-mtd/2002-March/004429.html - * but it doesn't fix it. :/ --jonas */ - /* -MAX_WBITS impiles -> suppress zlib header and adler32. try first - * with -MAX_WBITS, if that fails, try MAX_WBITS to be backwards - * compatible */ - wbits = -MAX_WBITS; - - for (tries = 0; tries < 2; tries++) { - z_stream stream; - - memset(&stream, 0, sizeof(z_stream)); - - /* FIXME: Use inflateInit2() to configure low memory - * usage for CONFIG_SMALL configurations. --jonas */ - error = inflateInit2(&stream, wbits); - if (error != Z_OK) break; - - stream.next_in = (char *)data; - stream.avail_in = len; - - error = skip_gzip_header(&stream); - if (error != Z_OK) { - stream.next_in = (char *)data; - stream.avail_in = len; - } - - do { - unsigned char *new_buffer; - size_t size = stream.total_out + MAX_STR_LEN; - - assert(stream.total_out >= 0); - assert(stream.next_in); - - new_buffer = mem_realloc(buffer, size); - if (!new_buffer) { - error = Z_MEM_ERROR; - break; - } - - buffer = new_buffer; - stream.next_out = buffer + stream.total_out; - stream.avail_out = MAX_STR_LEN; - - error = inflate(&stream, Z_NO_FLUSH); - if (error == Z_STREAM_END) { - /* Here gzio.c has some detection of - * concatenated .gz files and will do a gzip - * header skip and an inflateReset() call - * before continuing. It partly uses CRC to - * detect that. */ - *new_len = stream.total_out; - error = Z_OK; - break; - } - - } while (error == Z_OK && stream.avail_in > 0); - - inflateEnd(&stream); - - if (error != Z_DATA_ERROR) - break; - - /* Try again with next wbits */ - wbits = -wbits; - } - - if (error != Z_OK) { - if (buffer) mem_free(buffer); - *new_len = 0; - return NULL; - } - - return buffer; -} - - -static void -gzip_close(struct stream_encoded *stream) -{ - gzclose((gzFile *) stream->data); -} - -static const unsigned char *const gzip_extensions[] = { ".gz", ".tgz", NULL }; - -const struct decoding_backend gzip_decoding_backend = { - "gzip", - gzip_extensions, - gzip_open, - gzip_read, - gzip_decode_buffer, - gzip_close, -}; diff --git a/src/protocol/http/http.c b/src/protocol/http/http.c index bf675121c..ab27239d1 100644 --- a/src/protocol/http/http.c +++ b/src/protocol/http/http.c @@ -759,7 +759,7 @@ http_send_header(struct socket *socket) add_to_string(&header, ", "); #endif - add_to_string(&header, "gzip"); + add_to_string(&header, "deflate, gzip"); #endif add_crlf_to_string(&header); #endif @@ -995,28 +995,23 @@ decompress_data(struct connection *conn, unsigned char *data, int len, int *new_len) { struct http_connection_info *http = conn->info; - /* to_read is number of bytes to be read from the decoder. It is 65536 - * (then we are just emptying the decoder buffer as we finished the walk - * through the incoming stream already) or PIPE_BUF / 2 (when we are - * still walking through the stream - then we write PIPE_BUF / 2 to the - * pipe and read it back to the decoder ASAP; the point is that we can't - * write more than PIPE_BUF to the pipe at once, but we also have to - * never let read_encoded() (gzread(), in fact) to empty the pipe - that - * causes further malfunction of zlib :[ ... so we will make sure that - * we will always have at least PIPE_BUF / 2 + 1 in the pipe (returning - * early otherwise)). */ enum { NORMAL, FINISHING } state = NORMAL; int did_read = 0; int *length_of_block; unsigned char *output = NULL; - length_of_block = (http->length == LEN_CHUNKED ? &http->chunk_remaining - : &http->length); - #define BIG_READ 65536 - if (!*length_of_block) { - /* Going to finish this decoding bussiness. */ - state = FINISHING; + + if (http->length == LEN_CHUNKED) { + if (http->chunk_remaining == CHUNK_ZERO_SIZE) + state = FINISHING; + length_of_block = &http->chunk_remaining; + } else { + length_of_block = &http->length; + if (!*length_of_block) { + /* Going to finish this decoding bussiness. */ + state = FINISHING; + } } if (conn->content_encoding == ENCODING_NONE) { @@ -1035,16 +1030,9 @@ decompress_data(struct connection *conn, unsigned char *data, int len, } do { - /* The initial value is used only when state == NORMAL. - * Unconditional initialization avoids a GCC warning. */ - int to_read = PIPE_BUF / 2; - if (state == NORMAL) { /* ... we aren't finishing yet. */ - int written; - - written = safe_write(conn->stream_pipes[1], data, - len > to_read ? to_read : len); + int written = safe_write(conn->stream_pipes[1], data, len); if (written > 0) { data += written; @@ -1084,14 +1072,13 @@ decompress_data(struct connection *conn, unsigned char *data, int len, did_read = read_encoded(conn->stream, output + *new_len, BIG_READ); if (did_read > 0) *new_len += did_read; - else if (did_read == -1) { - mem_free_set(&output, NULL); - *new_len = 0; - break; /* Loop prevention (bug 517), is this correct ? --Zas */ + else { + if (did_read < 0) state = FINISHING; + break; } - } while (len || did_read == BIG_READ); + } while (len || (did_read == BIG_READ)); - shutdown_connection_stream(conn); + if (state == FINISHING) shutdown_connection_stream(conn); return output; } @@ -1218,11 +1205,8 @@ read_chunked_http_data(struct connection *conn, struct read_buffer *rb) } else { unsigned char *data; int data_len; - int len; int zero = (http->chunk_remaining == CHUNK_ZERO_SIZE); - - if (zero) http->chunk_remaining = 0; - len = http->chunk_remaining; + int len = zero ? 0 : http->chunk_remaining; /* Maybe everything necessary didn't come yet.. */ int_upper_bound(&len, rb->length); @@ -1863,6 +1847,8 @@ again: if (file_encoding != ENCODING_GZIP && (!strcasecmp(d, "gzip") || !strcasecmp(d, "x-gzip"))) conn->content_encoding = ENCODING_GZIP; + if (!strcasecmp(d, "deflate") || !strcasecmp(d, "x-deflate")) + conn->content_encoding = ENCODING_DEFLATE; #endif #ifdef CONFIG_BZIP2 @@ -1879,8 +1865,7 @@ again: conn->cached->encoding_info = stracpy(get_encoding_name(conn->content_encoding)); } - if (http->length == -1 - || (PRE_HTTP_1_1(http->recv_version) && http->close)) + if (http->length == -1 || http->close) socket->state = SOCKET_END_ONCLOSE; read_http_data(socket, rb);