diff --git a/NEWS b/NEWS index 8f0000ff..d326dcef 100644 --- a/NEWS +++ b/NEWS @@ -38,6 +38,8 @@ Bugs that should be removed from NEWS before the 0.12.0 release: was the first release that had these bugs. * bug 1033: Fix memory leak in ECMAScript window.open. ELinks 0.12pre1 was the first release that had this bug. +* bug 1034: ``Content-Encoding: deflate'' allows a zlib header as + specified in RFC 2616. * Global ECMAScript functions alert, open, and setTimeout again work with SEE. ELinks 0.12pre1 was the first release that supported SEE at all. diff --git a/src/encoding/deflate.c b/src/encoding/deflate.c index f4b97f2a..aae2e299 100644 --- a/src/encoding/deflate.c +++ b/src/encoding/deflate.c @@ -125,6 +125,26 @@ deflate_read(struct stream_encoded *stream, unsigned char *buf, int len) restart: err = inflate(&data->deflate_stream, Z_SYNC_FLUSH); if (err == Z_DATA_ERROR && !data->after_first_read) { + /* RFC 2616 requires a zlib header for + * "Content-Encoding: deflate", but some HTTP + * servers (Microsoft-IIS/6.0 at blogs.msdn.com, + * and reportedly Apache with mod_deflate) omit + * that, causing Z_DATA_ERROR. Clarification of + * the term "deflate" has been requested for the + * next version of HTTP: + * http://www3.tools.ietf.org/wg/httpbis/trac/ticket/73 + * + * Try to recover by telling zlib not to expect + * the header. If the error does not happen on + * the first inflate() call, then it is too late + * to recover because ELinks may already have + * discarded part of the input data. + * + * TODO: This fallback to raw DEFLATE is currently + * enabled for "Content-Encoding: gzip" too. It + * might be better to fall back to no compression + * at all, because Apache can send that header for + * uncompressed *.gz.md5 files. */ data->after_first_read = 1; inflateEnd(&data->deflate_stream); data->deflate_stream.avail_out = len; diff --git a/test/cgi/chunked_deflate.py b/test/cgi/chunked_deflate.py index 5300c575..45f06828 100755 --- a/test/cgi/chunked_deflate.py +++ b/test/cgi/chunked_deflate.py @@ -1,11 +1,9 @@ #!/usr/bin/env python -import os, time -from zlib import * +import os, time, zlib data1 = 'Two lines should be visible.
The second line.' -ob = compressobj(Z_DEFAULT_COMPRESSION, DEFLATED, -MAX_WBITS) -cd1 = ob.compress(data1) -cd1 += ob.flush() +cd1 = zlib.compress(data1) + length = len(cd1) next_chunk = hex(length - 10)[2:] diff --git a/test/cgi/chunked_raw_deflate.py b/test/cgi/chunked_raw_deflate.py new file mode 100755 index 00000000..6a762636 --- /dev/null +++ b/test/cgi/chunked_raw_deflate.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +import os, time +from zlib import * + +# According to section 3.5 of RFC 2616, "Content-Encoding: deflate" +# requires a ZLIB header. However, Microsoft-IIS/6.0 sends a raw +# DEFLATE stream instead. This CGI tests how ELinks handles that. + +data1 = 'Two lines should be visible.
The second line.' +ob = compressobj(Z_DEFAULT_COMPRESSION, DEFLATED, -MAX_WBITS) +cd1 = ob.compress(data1) +cd1 += ob.flush() +length = len(cd1) +next_chunk = hex(length - 10)[2:] + +os.write(1, "Date: Sun, 20 Jan 2008 15:24:00 GMT\r\nServer: ddd\r\nTransfer-Encoding: chunked\r\nContent-Encoding: deflate\r\nConnection: close\r\nContent-Type: text/html; charset=ISO-8859-1\r\n") +os.write(1, "\r\na\r\n") +os.write(1, cd1[:10]) +time.sleep(2) +os.write(1, "\r\n%s\r\n" % next_chunk) +os.write(1, cd1[10:]) +os.write(1, "\r\n0\r\n")