From 7e5e05ca608ee3abf1343a589857d1b8b1026984 Mon Sep 17 00:00:00 2001 From: Kalle Olavi Niemitalo Date: Sat, 7 Jun 2008 21:27:37 +0300 Subject: [PATCH] Bug 517: read_encoded() == 0 might not mean EOF if non-blocking. Without this patch, ELinks showed garbage at when bzip2 decompression was enabled. safe_read() in bzip2_read() did not see all of the body bytes that ELinks had received from the server. After bzip2_read() received EAGAIN from safe_read() and returned 0, something skipped 1460 bytes. decompress_data() apparently assumed that read_encoded() returning 0 meant the end of the file, and returned even though len still was nonzero, i.e. it had not yet written to the pipe all the data that the caller (read_chunked_http_data() or read_normal_http_data()) had provided. The caller did not know this, and discarded the data. --- src/encoding/encoding.c | 10 ++++++++-- src/protocol/http/http.c | 14 ++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/encoding/encoding.c b/src/encoding/encoding.c index 307bafd18..def3c806c 100644 --- a/src/encoding/encoding.c +++ b/src/encoding/encoding.c @@ -215,8 +215,14 @@ try_encoding_extensions(struct string *filename, int *fd) return ENCODING_NONE; } -/* Reads the file from @stream in chunks of size @readsize. */ -/* Returns a connection state. S_OK if all is well. */ +/** Reads the file from @a stream in chunks of size @a readsize. + * + * @a stream should be in blocking mode. If it is in non-blocking + * mode, this function can return an empty string in @a page just + * because no more data is available yet, and the caller cannot know + * whether the true end of the stream has been reached. + * + * @return a connection state. S_OK if all is well. */ enum connection_state read_file(struct stream_encoded *stream, int readsize, struct string *page) { diff --git a/src/protocol/http/http.c b/src/protocol/http/http.c index 260754fbf..4131708b2 100644 --- a/src/protocol/http/http.c +++ b/src/protocol/http/http.c @@ -1005,7 +1005,8 @@ http_send_header(struct socket *socket) /* This function decompresses the data block given in @data (if it was * compressed), which is long @len bytes. The decompressed data block is given * back to the world as the return value and its length is stored into - * @new_len. + * @new_len. After this function returns, the caller will discard all the @len + * input bytes, so this function must use all of them unless an error occurs. * * In this function, value of either http->chunk_remaining or http->length is * being changed (it depends on if chunked mode is used or not). @@ -1100,11 +1101,16 @@ decompress_data(struct connection *conn, unsigned char *data, int len, did_read = read_encoded(conn->stream, output + *new_len, BIG_READ); - if (did_read > 0) *new_len += did_read; - else { - if (did_read < 0) state = FINISHING; + /* Do not break from the loop if did_read == 0. It + * means no decoded data is available yet, but some may + * become available later. This happens especially with + * the bzip2 decoder, which needs an entire compressed + * block as input before it generates any output. */ + if (did_read < 0) { + state = FINISHING; break; } + *new_len += did_read; } while (len || (did_read == BIG_READ)); if (state == FINISHING) shutdown_connection_stream(conn);