1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-12-04 14:46:47 -05:00

bug 1083: Distinguish EOF from errors in read_encoded

The 2009-08-15 fix for bug 1083 made read_encoded() return -1 of EOF,
like decompress_data() expects.  Unfortunately, read_file() too calls
read_encoded(), and it treated the -1 as an error and reporter
whatever error had been left in errno.  This made it impossible to
display local files, compressed or not.

Apparently then, read_encoded() needs to distinguish between decoded
bytes, EOF, EAGAIN, and true errors.  Make it return an enum.
This commit is contained in:
Kalle Olavi Niemitalo 2009-08-19 00:18:20 +03:00 committed by Kalle Olavi Niemitalo
parent 972afa1c26
commit 9079106894
6 changed files with 255 additions and 93 deletions

View File

@ -31,12 +31,11 @@ struct bz2_enc_data {
/* The file descriptor from which we read. */ /* The file descriptor from which we read. */
int fdread; int fdread;
/* Initially 0; set to 1 when BZ2_bzDecompress indicates /** Error code to be returned by all later bzip2_read() calls.
* BZ_STREAM_END, which means it has found the bzip2-specific * ::READENC_EAGAIN is used here as a passive value that means
* end-of-stream marker and all data has been decompressed. * no such error occurred yet. ::READENC_ERRNO is not allowed
* Then we neither read from the file nor call BZ2_bzDecompress * because there is no @c sticky_errno member here. */
* any more. */ enum read_encoded_result sticky_result;
int last_read;
/* A buffer for data that has been read from the file but not /* A buffer for data that has been read from the file but not
* yet decompressed. fbz_stream.next_in and fbz_stream.avail_in * yet decompressed. fbz_stream.next_in and fbz_stream.avail_in
@ -65,7 +64,7 @@ bzip2_open(struct stream_encoded *stream, int fd)
* will be initialized on demand by bzip2_read. */ * will be initialized on demand by bzip2_read. */
copy_struct(&data->fbz_stream, &null_bz_stream); copy_struct(&data->fbz_stream, &null_bz_stream);
data->fdread = fd; data->fdread = fd;
data->last_read = 0; data->sticky_result = READENC_EAGAIN;
err = BZ2_bzDecompressInit(&data->fbz_stream, 0, 0); err = BZ2_bzDecompressInit(&data->fbz_stream, 0, 0);
if (err != BZ_OK) { if (err != BZ_OK) {
@ -78,34 +77,64 @@ bzip2_open(struct stream_encoded *stream, int fd)
return 0; return 0;
} }
static enum read_encoded_result
map_bzip2_ret(int ret)
{
switch (ret) {
case BZ_STREAM_END:
return READENC_STREAM_END;
case BZ_DATA_ERROR:
case BZ_DATA_ERROR_MAGIC:
return READENC_DATA_ERROR;
case BZ_UNEXPECTED_EOF:
return READENC_UNEXPECTED_EOF;
case BZ_IO_ERROR:
return READENC_ERRNO;
case BZ_MEM_ERROR:
return READENC_MEM_ERROR;
case BZ_RUN_OK: /* not possible in decompression */
case BZ_FLUSH_OK: /* likewise */
case BZ_FINISH_OK: /* likewise */
case BZ_OUTBUFF_FULL: /* only for BuffToBuff functions */
case BZ_CONFIG_ERROR:
case BZ_SEQUENCE_ERROR:
case BZ_PARAM_ERROR:
default:
return READENC_INTERNAL;
}
}
static int static int
bzip2_read(struct stream_encoded *stream, unsigned char *buf, int len) bzip2_read(struct stream_encoded *stream, unsigned char *buf, int len)
{ {
struct bz2_enc_data *data = (struct bz2_enc_data *) stream->data; struct bz2_enc_data *data = (struct bz2_enc_data *) stream->data;
int err = 0; int err = 0;
int l = 0;
if (!data) return -1; if (!data) return READENC_INTERNAL;
assert(len > 0); assert(len > 0);
if_assert_failed return READENC_INTERNAL;
if (data->last_read) return -1; if (data->sticky_result != READENC_EAGAIN)
return data->sticky_result;
data->fbz_stream.avail_out = len; data->fbz_stream.avail_out = len;
data->fbz_stream.next_out = buf; data->fbz_stream.next_out = buf;
do { do {
if (data->fbz_stream.avail_in == 0) { if (data->fbz_stream.avail_in == 0) {
int l = safe_read(data->fdread, data->buf, l = safe_read(data->fdread, data->buf,
ELINKS_BZ_BUFFER_LENGTH); ELINKS_BZ_BUFFER_LENGTH);
if (l == -1) { if (l == -1) {
if (errno == EAGAIN) if (errno == EAGAIN)
break; break;
else else
return -1; /* I/O error */ return READENC_ERRNO; /* I/O error */
} else if (l == 0) { } else if (l == 0) {
/* EOF. It is error: we wait for more bytes */ /* EOF. It is error: we wait for more bytes */
return -1; return READENC_UNEXPECTED_EOF;
} }
data->fbz_stream.next_in = data->buf; data->fbz_stream.next_in = data->buf;
@ -114,15 +143,19 @@ bzip2_read(struct stream_encoded *stream, unsigned char *buf, int len)
err = BZ2_bzDecompress(&data->fbz_stream); err = BZ2_bzDecompress(&data->fbz_stream);
if (err == BZ_STREAM_END) { if (err == BZ_STREAM_END) {
data->last_read = 1; data->sticky_result = READENC_STREAM_END;
break; break;
} else if (err != BZ_OK) { } else if (err != BZ_OK) {
return -1; return map_bzip2_ret(err);
} }
} while (data->fbz_stream.avail_out > 0); } while (data->fbz_stream.avail_out > 0);
assert(len - data->fbz_stream.avail_out == data->fbz_stream.next_out - (char *) buf); l = len - data->fbz_stream.avail_out;
return len - data->fbz_stream.avail_out; assert(l == data->fbz_stream.next_out - (char *) buf);
if (l > 0) /* Positive return values are byte counts */
return l;
else /* and others are from enum read_encoded_result */
return data->sticky_result;
} }
#ifdef CONFIG_SMALL #ifdef CONFIG_SMALL

View File

@ -29,9 +29,18 @@ struct deflate_enc_data {
/* The file descriptor from which we read. */ /* The file descriptor from which we read. */
int fdread; int fdread;
unsigned int last_read:1;
unsigned int after_first_read:1; unsigned int after_first_read:1;
/** Error code to be returned by all later deflate_read()
* calls. ::READENC_EAGAIN is used here as a passive value
* that means no such error occurred yet. */
enum read_encoded_result sticky_result;
/** Error code to be set to @c errno by all later
* deflate_read() calls. This is interesting only when
* #sticky_result == ::READENC_ERRNO. */
int sticky_errno;
/* A buffer for data that has been read from the file but not /* A buffer for data that has been read from the file but not
* yet decompressed. z_stream.next_in and z_stream.avail_in * yet decompressed. z_stream.next_in and z_stream.avail_in
* refer to this buffer. */ * refer to this buffer. */
@ -59,8 +68,9 @@ deflate_open(int window_size, struct stream_encoded *stream, int fd)
* will be initialized on demand by deflate_read. */ * will be initialized on demand by deflate_read. */
copy_struct(&data->deflate_stream, &null_z_stream); copy_struct(&data->deflate_stream, &null_z_stream);
data->fdread = fd; data->fdread = fd;
data->last_read = 0;
data->after_first_read = 0; data->after_first_read = 0;
data->sticky_result = READENC_EAGAIN;
data->sticky_errno = 0;
err = inflateInit2(&data->deflate_stream, window_size); err = inflateInit2(&data->deflate_stream, window_size);
if (err != Z_OK) { if (err != Z_OK) {
@ -88,6 +98,36 @@ deflate_gzip_open(struct stream_encoded *stream, int fd)
return deflate_open(MAX_WBITS + 32, stream, fd); return deflate_open(MAX_WBITS + 32, stream, fd);
} }
static void
deflate_set_sticky(struct deflate_enc_data *data, int ret, int save_errno)
{
switch (ret) {
case Z_STREAM_END:
data->sticky_result = READENC_STREAM_END;
break;
case Z_DATA_ERROR:
case Z_NEED_DICT:
data->sticky_result = READENC_DATA_ERROR;
break;
case Z_ERRNO:
data->sticky_result = READENC_ERRNO;
data->sticky_errno = save_errno;
break;
case Z_MEM_ERROR:
data->sticky_result = READENC_MEM_ERROR;
break;
case Z_STREAM_ERROR:
case Z_BUF_ERROR:
case Z_VERSION_ERROR:
default:
data->sticky_result = READENC_INTERNAL;
break;
}
}
/*! @return A positive number means that many bytes were
* written to the @a buf array. Otherwise, the value is
* enum read_encoded_result. */
static int static int
deflate_read(struct stream_encoded *stream, unsigned char *buf, int len) deflate_read(struct stream_encoded *stream, unsigned char *buf, int len)
{ {
@ -95,11 +135,16 @@ deflate_read(struct stream_encoded *stream, unsigned char *buf, int len)
int err = 0; int err = 0;
int l = 0; int l = 0;
if (!data) return -1; if (!data) return READENC_INTERNAL;
assert(len > 0); assert(len > 0);
if_assert_failed return READENC_INTERNAL;
if (data->last_read) return -1; if (data->sticky_result != READENC_EAGAIN) {
if (data->sticky_result == READENC_ERRNO)
errno = data->sticky_errno;
return data->sticky_result;
}
data->deflate_stream.avail_out = len; data->deflate_stream.avail_out = len;
data->deflate_stream.next_out = buf; data->deflate_stream.next_out = buf;
@ -107,16 +152,16 @@ deflate_read(struct stream_encoded *stream, unsigned char *buf, int len)
do { do {
if (data->deflate_stream.avail_in == 0) { if (data->deflate_stream.avail_in == 0) {
l = safe_read(data->fdread, data->buf, l = safe_read(data->fdread, data->buf,
ELINKS_DEFLATE_BUFFER_LENGTH); ELINKS_DEFLATE_BUFFER_LENGTH);
if (l == -1) { if (l == -1) {
if (errno == EAGAIN) if (errno == EAGAIN)
break; break;
else else
return -1; /* I/O error */ return READENC_ERRNO; /* I/O error */
} else if (l == 0) { } else if (l == 0) {
/* EOF. It is error: we wait for more bytes */ /* EOF. It is error: we wait for more bytes */
return -1; return READENC_UNEXPECTED_EOF;
} }
data->deflate_stream.next_in = data->buf; data->deflate_stream.next_in = data->buf;
@ -156,17 +201,21 @@ restart:
if (err == Z_OK) goto restart; if (err == Z_OK) goto restart;
} }
data->after_first_read = 1; data->after_first_read = 1;
if (err == Z_STREAM_END) { if (err != Z_OK) {
data->last_read = 1; deflate_set_sticky(data, err, errno);
break;
} else if (err != Z_OK) {
data->last_read = 1;
break; break;
} }
} while (data->deflate_stream.avail_out > 0); } while (data->deflate_stream.avail_out > 0);
assert(len - data->deflate_stream.avail_out == data->deflate_stream.next_out - buf); l = len - data->deflate_stream.avail_out;
return len - data->deflate_stream.avail_out; assert(l == data->deflate_stream.next_out - buf);
if (l > 0) /* Positive return values are byte counts */
return l;
else { /* and others are from enum read_encoded_result */
if (data->sticky_result == READENC_ERRNO)
errno = data->sticky_errno;
return data->sticky_result;
}
} }
static unsigned char * static unsigned char *

View File

@ -45,6 +45,9 @@ dummy_open(struct stream_encoded *stream, int fd)
return 0; return 0;
} }
/*! @return A positive number means that many bytes were
* written to the @a data array. Otherwise, the value is
* enum read_encoded_result. */
static int static int
dummy_read(struct stream_encoded *stream, unsigned char *data, int len) dummy_read(struct stream_encoded *stream, unsigned char *data, int len)
{ {
@ -53,10 +56,12 @@ dummy_read(struct stream_encoded *stream, unsigned char *data, int len)
if (got > 0) if (got > 0)
return got; return got;
else if (got == -1 && errno == EAGAIN) else if (got == 0)
return 0; return READENC_STREAM_END;
else if (errno == EAGAIN)
return READENC_EAGAIN;
else else
return -1; return READENC_ERRNO;
} }
static unsigned char * static unsigned char *
@ -131,11 +136,8 @@ open_encoded(int fd, enum stream_encoding encoding)
* size of _returned_ data, not desired size of data read from * size of _returned_ data, not desired size of data read from
* stream. * stream.
* *
* @return the number of bytes written to the @a data array if * @return A positive number means that many bytes were written to the
* something was decoded; 0 if no data is available yet but some may * @a data array. Otherwise, the value is enum read_encoded_result. */
* become available later; or -1 if there will be no further data,
* either because an error occurred or because an end-of-stream mark
* was reached. */
int int
read_encoded(struct stream_encoded *stream, unsigned char *data, int len) read_encoded(struct stream_encoded *stream, unsigned char *data, int len)
{ {
@ -241,6 +243,9 @@ try_encoding_extensions(struct string *filename, int *fd)
struct connection_state struct connection_state
read_file(struct stream_encoded *stream, int readsize, struct string *page) read_file(struct stream_encoded *stream, int readsize, struct string *page)
{ {
int readlen;
int save_errno;
if (!init_string(page)) return connection_state(S_OUT_OF_MEM); if (!init_string(page)) return connection_state(S_OUT_OF_MEM);
/* We read with granularity of stt.st_size (given as @readsize) - this /* We read with granularity of stt.st_size (given as @readsize) - this
@ -252,46 +257,55 @@ read_file(struct stream_encoded *stream, int readsize, struct string *page)
* allocate zero number of bytes. */ * allocate zero number of bytes. */
if (!readsize) readsize = 4096; if (!readsize) readsize = 4096;
while (realloc_string(page, page->length + readsize)) { for (;;) {
unsigned char *string_pos = page->source + page->length; unsigned char *string_pos;
int readlen = read_encoded(stream, string_pos, readsize);
if (readlen < 0) { if (!realloc_string(page, page->length + readsize)) {
done_string(page); done_string(page);
return connection_state(S_OUT_OF_MEM);
}
/* If it is some I/O error (and errno is set) that will string_pos = page->source + page->length;
* do. Since errno == 0 == S_WAIT and we cannot have readlen = read_encoded(stream, string_pos, readsize);
* that. */ if (readlen <= 0) {
if (errno) save_errno = errno; /* in case of READENC_ERRNO */
return connection_state_for_errno(errno); break;
/* FIXME: This is indeed an internal error. If readed from a
* corrupted encoded file nothing or only some of the
* data will be read. */
return connection_state(S_ENCODE_ERROR);
} else if (readlen == 0) {
/* NUL-terminate just in case */
page->source[page->length] = '\0';
return connection_state(S_OK);
} }
page->length += readlen; page->length += readlen;
#if 0
/* This didn't work so well as it should (I had to implement
* end of stream handling to bzip2 anyway), so I rather
* disabled this. */
if (readlen < readsize) {
/* This is much safer. It should always mean that we
* already read everything possible, and it permits us
* more elegant of handling end of file with bzip2. */
break;
}
#endif
} }
done_string(page); switch (readlen) {
return connection_state(S_OUT_OF_MEM); case READENC_ERRNO:
done_string(page);
return connection_state_for_errno(save_errno);
case READENC_STREAM_END:
/* NUL-terminate just in case */
page->source[page->length] = '\0';
return connection_state(S_OK);
case READENC_UNEXPECTED_EOF:
case READENC_DATA_ERROR:
done_string(page);
/* FIXME: This is indeed an internal error. If readed from a
* corrupted encoded file nothing or only some of the
* data will be read. */
return connection_state(S_ENCODE_ERROR);
case READENC_MEM_ERROR:
done_string(page);
return connection_state(S_OUT_OF_MEM);
case READENC_EAGAIN:
case READENC_INTERNAL:
default:
ERROR("unexpected readlen==%d", readlen);
/* If you have a breakpoint in elinks_error(),
* you can examine page before it gets freed. */
done_string(page);
return connection_state(S_INTERNAL);
}
} }
static inline int static inline int

View File

@ -15,6 +15,35 @@ enum stream_encoding {
ENCODINGS_KNOWN, ENCODINGS_KNOWN,
}; };
/** Special values returned by read_encoded() and
* decoding_backend.read. Positive numbers cannot be used in
* this enum because they mean byte counts as return values.
* Zero could be used but currently is not used.
* Do not rely on the order of values here. */
enum read_encoded_result {
/** An error occurred and the code is in @c errno. */
READENC_ERRNO = -1,
/** Saw an end-of-file mark in the compressed data. */
READENC_STREAM_END = -2,
/** The data ended before the decompressor expected. */
READENC_UNEXPECTED_EOF = -3,
/** Cannot decompress anything yet: please provide more data. */
READENC_EAGAIN = -4,
/** The input data is malformed: for example, checksums don't
* match, or a header is missing. */
READENC_DATA_ERROR = -5,
/** Out of memory */
READENC_MEM_ERROR = -6,
/** An internal error occurred. */
READENC_INTERNAL = -7
};
struct stream_encoded { struct stream_encoded {
enum stream_encoding encoding; enum stream_encoding encoding;
void *data; void *data;
@ -23,9 +52,16 @@ struct stream_encoded {
struct decoding_backend { struct decoding_backend {
const unsigned char *name; const unsigned char *name;
const unsigned char *const *extensions; const unsigned char *const *extensions;
int (*open)(struct stream_encoded *stream, int fd); int (*open)(struct stream_encoded *stream, int fd);
/*! @return A positive number means that many bytes were
* written to the @a data array. Otherwise, the value is
* enum read_encoded_result. */
int (*read)(struct stream_encoded *stream, unsigned char *data, int len); int (*read)(struct stream_encoded *stream, unsigned char *data, int len);
unsigned char *(*decode_buffer)(unsigned char *data, int len, int *new_len); unsigned char *(*decode_buffer)(unsigned char *data, int len, int *new_len);
void (*close)(struct stream_encoded *stream); void (*close)(struct stream_encoded *stream);
}; };

View File

@ -25,7 +25,13 @@
struct lzma_enc_data { struct lzma_enc_data {
lzma_stream flzma_stream; lzma_stream flzma_stream;
int fdread; int fdread;
int last_read;
/** Error code to be returned by all later lzma_read() calls.
* ::READENC_EAGAIN is used here as a passive value that means
* no such error occurred yet. ::READENC_ERRNO is not allowed
* because there is no @c sticky_errno member here. */
enum read_encoded_result sticky_result;
unsigned char buf[ELINKS_BZ_BUFFER_LENGTH]; unsigned char buf[ELINKS_BZ_BUFFER_LENGTH];
}; };
@ -42,7 +48,7 @@ lzma_open(struct stream_encoded *stream, int fd)
copy_struct(&data->flzma_stream, &LZMA_STREAM_INIT_VAR); copy_struct(&data->flzma_stream, &LZMA_STREAM_INIT_VAR);
data->fdread = fd; data->fdread = fd;
data->last_read = 0; data->sticky_result = READENC_EAGAIN;
err = lzma_auto_decoder(&data->flzma_stream, NULL, NULL); err = lzma_auto_decoder(&data->flzma_stream, NULL, NULL);
if (err != LZMA_OK) { if (err != LZMA_OK) {
@ -55,34 +61,58 @@ lzma_open(struct stream_encoded *stream, int fd)
return 0; return 0;
} }
static enum read_encoded_result
map_lzma_ret(lzma_ret ret)
{
switch (ret) {
case LZMA_STREAM_END:
return READENC_STREAM_END;
case LZMA_DATA_ERROR:
case LZMA_HEADER_ERROR:
return READENC_DATA_ERROR;
case LZMA_MEM_ERROR:
return READENC_MEM_ERROR;
case LZMA_PROG_ERROR:
case LZMA_BUF_ERROR:
default:
return READENC_INTERNAL;
}
}
/*! @return A positive number means that many bytes were
* written to the @a buf array. Otherwise, the value is
* enum read_encoded_result. */
static int static int
lzma_read(struct stream_encoded *stream, unsigned char *buf, int len) lzma_read(struct stream_encoded *stream, unsigned char *buf, int len)
{ {
struct lzma_enc_data *data = (struct lzma_enc_data *) stream->data; struct lzma_enc_data *data = (struct lzma_enc_data *) stream->data;
int err = 0; int err = 0;
int l = 0;
if (!data) return -1; if (!data) return READENC_INTERNAL;
assert(len > 0); assert(len > 0);
if_assert_failed return READENC_INTERNAL;
if (data->last_read) return -1; if (data->sticky_result != READENC_EAGAIN)
return data->sticky_result;
data->flzma_stream.avail_out = len; data->flzma_stream.avail_out = len;
data->flzma_stream.next_out = buf; data->flzma_stream.next_out = buf;
do { do {
if (data->flzma_stream.avail_in == 0) { if (data->flzma_stream.avail_in == 0) {
int l = safe_read(data->fdread, data->buf, l = safe_read(data->fdread, data->buf,
ELINKS_BZ_BUFFER_LENGTH); ELINKS_BZ_BUFFER_LENGTH);
if (l == -1) { if (l == -1) {
if (errno == EAGAIN) if (errno == EAGAIN)
break; break;
else else
return -1; /* I/O error */ return READENC_ERRNO; /* I/O error */
} else if (l == 0) { } else if (l == 0) {
/* EOF. It is error: we wait for more bytes */ /* EOF. It is error: we wait for more bytes */
return -1; return READENC_UNEXPECTED_EOF;
} }
data->flzma_stream.next_in = data->buf; data->flzma_stream.next_in = data->buf;
@ -91,15 +121,19 @@ lzma_read(struct stream_encoded *stream, unsigned char *buf, int len)
err = lzma_code(&data->flzma_stream, LZMA_RUN); err = lzma_code(&data->flzma_stream, LZMA_RUN);
if (err == LZMA_STREAM_END) { if (err == LZMA_STREAM_END) {
data->last_read = 1; data->sticky_result = READENC_STREAM_END;
break; break;
} else if (err != LZMA_OK) { } else if (err != LZMA_OK && err != LZMA_UNSUPPORTED_CHECK) {
return -1; return map_lzma_ret(err);
} }
} while (data->flzma_stream.avail_out > 0); } while (data->flzma_stream.avail_out > 0);
assert(len - data->flzma_stream.avail_out == data->flzma_stream.next_out - buf); l = len - data->flzma_stream.avail_out;
return len - data->flzma_stream.avail_out; assert(l == data->flzma_stream.next_out - buf);
if (l > 0) /* Positive return values are byte counts */
return l;
else /* and others are from enum read_encoded_result */
return data->sticky_result;
} }
static unsigned char * static unsigned char *

View File

@ -1121,16 +1121,12 @@ decompress_data(struct connection *conn, unsigned char *data, int len,
did_read = read_encoded(conn->stream, output + *new_len, BIG_READ); did_read = read_encoded(conn->stream, output + *new_len, BIG_READ);
/* Do not break from the loop if did_read == 0. It if (did_read > 0)
* means no decoded data is available yet, but some may *new_len += did_read;
* become available later. This happens especially with else if (did_read != READENC_EAGAIN) {
* the bzip2 decoder, which needs an entire compressed
* block as input before it generates any output. */
if (did_read < 0) {
state = FINISHING; state = FINISHING;
break; break;
} }
*new_len += did_read;
} while (len || (did_read == BIG_READ)); } while (len || (did_read == BIG_READ));
if (state == FINISHING) shutdown_connection_stream(conn); if (state == FINISHING) shutdown_connection_stream(conn);