diff --git a/AUTHORS b/AUTHORS index a1abcbe70..d9ae16a7d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -218,6 +218,9 @@ Hugo Haas Minor random hacking debian/watch file + + bzip2 decoding fix and rewrite + Ingo Blechschmidt German translation updates diff --git a/configure.in b/configure.in index a2e6f6c0f..20a87ec98 100644 --- a/configure.in +++ b/configure.in @@ -479,10 +479,6 @@ EL_CONFIG_OPTIONAL_LIBRARY(CONFIG_GZIP, zlib, zlib.h, z, gzdopen, EL_CONFIG_OPTIONAL_LIBRARY(CONFIG_BZIP2, bzlib, bzlib.h, bz2, BZ2_bzReadOpen, [ --without-bzlib disable bzlib support]) -EL_ARG_DEPEND(CONFIG_BZIP2_ENCODING, bzip2_encoding, [CONFIG_BZIP2:yes], - [Bzip2 content encoding], - [ --enable-bzip2-encoding enable Content-Encoding: bzip2 support (requires patched bzlib)]) - EL_CONFIG_OPTIONAL_LIBRARY(CONFIG_IDN, idn, idna.h, idn, stringprep_check_version, [ --without-idn disable international domain names support]) diff --git a/contrib/bzip2-pipe.patch b/contrib/bzip2-pipe.patch deleted file mode 100644 index 51ed8ba6f..000000000 --- a/contrib/bzip2-pipe.patch +++ /dev/null @@ -1,36 +0,0 @@ ---- bzip2-1.0.3/bzlib.c.old 2006-12-15 11:00:53.000000000 +0100 -+++ bzip2-1.0.3/bzlib.c 2006-12-15 11:10:57.000000000 +0100 -@@ -1209,6 +1209,7 @@ - int len ) - { - Int32 n, ret; -+ Int32 pi = 0; - bzFile* bzf = (bzFile*)b; - - BZ_SETERR(BZ_OK); -@@ -1233,8 +1234,10 @@ - if (bzf->strm.avail_in == 0 && !myfeof(bzf->handle)) { - n = fread ( bzf->buf, sizeof(UChar), - BZ_MAX_UNUSED, bzf->handle ); -- if (ferror(bzf->handle)) -- { BZ_SETERR(BZ_IO_ERROR); return 0; }; -+ if (ferror(bzf->handle)) { -+ if (n >= 0) pi = 1; -+ else { BZ_SETERR(BZ_IO_ERROR); return 0; } -+ } - bzf->bufN = n; - bzf->strm.avail_in = bzf->bufN; - bzf->strm.next_in = bzf->buf; -@@ -1246,8 +1249,10 @@ - { BZ_SETERR(ret); return 0; }; - - if (ret == BZ_OK && myfeof(bzf->handle) && -- bzf->strm.avail_in == 0 && bzf->strm.avail_out > 0) -- { BZ_SETERR(BZ_UNEXPECTED_EOF); return 0; }; -+ bzf->strm.avail_in == 0 && bzf->strm.avail_out > 0) { -+ if (!pi) { BZ_SETERR(BZ_UNEXPECTED_EOF); return 0; } -+ else return len - bzf->strm.avail_out; -+ } - - if (ret == BZ_STREAM_END) - { BZ_SETERR(BZ_STREAM_END); diff --git a/contrib/bzip2-pipe.patch.README b/contrib/bzip2-pipe.patch.README deleted file mode 100644 index 3c8959137..000000000 --- a/contrib/bzip2-pipe.patch.README +++ /dev/null @@ -1,4 +0,0 @@ -With the bzip2-pipe.patch Content-Encoding: bzip2 works with ELinks. -The patched bzlib wasn't heavilly tested in other situations. -bzerror may be set to other error code rather than BZ_IO_ERROR -in some cases, but IMHO it is safe to use this patch. --witekfl diff --git a/src/encoding/bzip2.c b/src/encoding/bzip2.c index 3f8df409b..023a70f59 100644 --- a/src/encoding/bzip2.c +++ b/src/encoding/bzip2.c @@ -12,6 +12,7 @@ #ifdef HAVE_BZLIB_H #include /* Everything needs this after stdio.h */ #endif +#include #include "elinks.h" @@ -19,13 +20,14 @@ #include "encoding/encoding.h" #include "util/memory.h" -struct bz2_enc_data { - FILE *file; - BZFILE *bzfile; - int last_read; /* If err after last bzRead() was BZ_STREAM_END.. */ -}; +#define ELINKS_BZ_BUFFER_LENGTH BZ_MAX_UNUSED -/* TODO: When it'll be official, use bzdopen() from Yoshioka Tsuneo. --pasky */ +struct bz2_enc_data { + unsigned char buf[ELINKS_BZ_BUFFER_LENGTH]; + bz_stream fbz_stream; + int fdread; + int last_read; /* If err after last bzDecompress was BZ_STREAM_END.. */ +}; static int bzip2_open(struct stream_encoded *stream, int fd) @@ -33,15 +35,17 @@ bzip2_open(struct stream_encoded *stream, int fd) struct bz2_enc_data *data = mem_alloc(sizeof(*data)); int err; + stream->data = 0; if (!data) { return -1; } + memset(data, 0, sizeof(struct bz2_enc_data) - ELINKS_BZ_BUFFER_LENGTH); + data->last_read = 0; - - data->file = fdopen(fd, "rb"); - - data->bzfile = BZ2_bzReadOpen(&err, data->file, 0, 0, NULL, 0); - if (!data->bzfile) { + data->fdread = fd; + + err = BZ2_bzDecompressInit(&data->fbz_stream, 0, 0); + if (err != BZ_OK) { mem_free(data); return -1; } @@ -57,18 +61,44 @@ bzip2_read(struct stream_encoded *stream, unsigned char *buf, int len) struct bz2_enc_data *data = (struct bz2_enc_data *) stream->data; int err = 0; - if (data->last_read) - return 0; + if (!data) return -1; - clearerr(data->file); - len = BZ2_bzRead(&err, data->bzfile, buf, len); + assert(len > 0); - if (err == BZ_STREAM_END) - data->last_read = 1; - else if (err) - return -1; + if (data->last_read) return 0; - return len; + data->fbz_stream.avail_out = len; + data->fbz_stream.next_out = buf; + + do { + if (data->fbz_stream.avail_in == 0) { + int l = safe_read(data->fdread, data->buf, + ELINKS_BZ_BUFFER_LENGTH); + + if (l == -1) { + if (errno == EAGAIN) + break; + else + return -1; /* I/O error */ + } else if (l == 0) { + /* EOF. It is error: we wait for more bytes */ + return -1; + } + + data->fbz_stream.next_in = data->buf; + data->fbz_stream.avail_in = l; + } + + err = BZ2_bzDecompress(&data->fbz_stream); + if (err == BZ_STREAM_END) { + data->last_read = 1; + break; + } else if (err != BZ_OK) { + return -1; + } + } while (data->fbz_stream.avail_out > 0); + + return len - data->fbz_stream.avail_out; } static unsigned char * @@ -148,11 +178,13 @@ static void bzip2_close(struct stream_encoded *stream) { struct bz2_enc_data *data = (struct bz2_enc_data *) stream->data; - int err; - BZ2_bzReadClose(&err, data->bzfile); - fclose(data->file); - mem_free(data); + if (data) { + BZ2_bzDecompressEnd(&data->fbz_stream); + close(data->fdread); + mem_free(data); + stream->data = 0; + } } static unsigned char *bzip2_extensions[] = { ".bz2", ".tbz", NULL }; diff --git a/src/protocol/http/http.c b/src/protocol/http/http.c index e58a2fe58..3d116121b 100644 --- a/src/protocol/http/http.c +++ b/src/protocol/http/http.c @@ -719,16 +719,16 @@ http_send_header(struct socket *socket) add_crlf_to_string(&header); /* TODO: Make this encoding.c function. */ -#if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2_ENCODING) +#if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2) add_to_string(&header, "Accept-Encoding: "); -#ifdef CONFIG_BZIP2_ENCODING +#ifdef CONFIG_BZIP2 add_to_string(&header, "bzip2"); #endif #ifdef CONFIG_GZIP -#ifdef CONFIG_BZIP2_ENCODING +#ifdef CONFIG_BZIP2 add_to_string(&header, ", "); #endif @@ -1797,7 +1797,7 @@ again: conn->content_encoding = ENCODING_GZIP; #endif -#ifdef CONFIG_BZIP2_ENCODING +#ifdef CONFIG_BZIP2 if (file_encoding != ENCODING_BZIP2 && (!strcasecmp(d, "bzip2") || !strcasecmp(d, "x-bzip2"))) conn->content_encoding = ENCODING_BZIP2;