2005-09-15 09:58:31 -04:00
|
|
|
/* Stream reading and decoding (mostly decompression) */
|
|
|
|
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
#include "config.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <errno.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <sys/stat.h> /* OS/2 needs this after sys/types.h */
|
|
|
|
#include <sys/types.h>
|
|
|
|
#ifdef HAVE_FCNTL_H
|
|
|
|
#include <fcntl.h> /* OS/2 needs this after sys/types.h */
|
|
|
|
#endif
|
|
|
|
#ifdef HAVE_UNISTD_H
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "elinks.h"
|
|
|
|
|
|
|
|
#include "config/options.h"
|
|
|
|
#include "encoding/encoding.h"
|
|
|
|
#include "network/state.h"
|
|
|
|
#include "osdep/osdep.h"
|
|
|
|
#include "util/memory.h"
|
|
|
|
#include "util/string.h"
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************************************
|
|
|
|
Dummy encoding (ENCODING_NONE)
|
|
|
|
*************************************************************************/
|
|
|
|
|
|
|
|
struct dummy_enc_data {
|
|
|
|
int fd;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int
|
|
|
|
dummy_open(struct stream_encoded *stream, int fd)
|
|
|
|
{
|
|
|
|
stream->data = mem_alloc(sizeof(struct dummy_enc_data));
|
|
|
|
if (!stream->data) return -1;
|
|
|
|
|
|
|
|
((struct dummy_enc_data *) stream->data)->fd = fd;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2021-01-02 10:20:27 -05:00
|
|
|
dummy_read(struct stream_encoded *stream, char *data, int len)
|
2005-09-15 09:58:31 -04:00
|
|
|
{
|
|
|
|
return safe_read(((struct dummy_enc_data *) stream->data)->fd, data, len);
|
|
|
|
}
|
|
|
|
|
2021-01-02 10:20:27 -05:00
|
|
|
static char *
|
|
|
|
dummy_decode_buffer(struct stream_encoded *stream, char *data, int len, int *new_len)
|
2005-09-15 09:58:31 -04:00
|
|
|
{
|
2021-01-02 10:20:27 -05:00
|
|
|
char *buffer = memacpy(data, len);
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
if (!buffer) return NULL;
|
|
|
|
|
|
|
|
*new_len = len;
|
|
|
|
return buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
dummy_close(struct stream_encoded *stream)
|
|
|
|
{
|
|
|
|
close(((struct dummy_enc_data *) stream->data)->fd);
|
|
|
|
mem_free(stream->data);
|
|
|
|
}
|
|
|
|
|
2021-01-02 10:20:27 -05:00
|
|
|
static const char *const dummy_extensions[] = { NULL };
|
2005-09-15 09:58:31 -04:00
|
|
|
|
2007-02-24 14:42:56 -05:00
|
|
|
static const struct decoding_backend dummy_decoding_backend = {
|
2005-09-15 09:58:31 -04:00
|
|
|
"none",
|
|
|
|
dummy_extensions,
|
|
|
|
dummy_open,
|
|
|
|
dummy_read,
|
|
|
|
dummy_decode_buffer,
|
|
|
|
dummy_close,
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/* Dynamic backend area */
|
|
|
|
|
2015-10-12 17:18:23 -04:00
|
|
|
#include "encoding/brotli.h"
|
2005-09-15 09:58:31 -04:00
|
|
|
#include "encoding/bzip2.h"
|
2021-12-23 09:39:42 -05:00
|
|
|
#include "encoding/gzip.h"
|
2006-03-24 06:30:54 -05:00
|
|
|
#include "encoding/lzma.h"
|
2019-07-14 12:54:39 -04:00
|
|
|
#include "encoding/zstd.h"
|
2005-09-15 09:58:31 -04:00
|
|
|
|
2007-02-24 14:42:56 -05:00
|
|
|
static const struct decoding_backend *const decoding_backends[] = {
|
2005-09-15 09:58:31 -04:00
|
|
|
&dummy_decoding_backend,
|
|
|
|
&gzip_decoding_backend,
|
|
|
|
&bzip2_decoding_backend,
|
2006-03-24 06:30:54 -05:00
|
|
|
&lzma_decoding_backend,
|
2015-10-12 17:18:23 -04:00
|
|
|
&brotli_decoding_backend,
|
2019-07-14 12:54:39 -04:00
|
|
|
&zstd_decoding_backend
|
2005-09-15 09:58:31 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************************************
|
|
|
|
Public functions
|
|
|
|
*************************************************************************/
|
|
|
|
|
|
|
|
|
|
|
|
/* Associates encoded stream with a fd. */
|
|
|
|
struct stream_encoded *
|
|
|
|
open_encoded(int fd, enum stream_encoding encoding)
|
|
|
|
{
|
|
|
|
struct stream_encoded *stream;
|
|
|
|
|
|
|
|
stream = mem_alloc(sizeof(*stream));
|
|
|
|
if (!stream) return NULL;
|
|
|
|
|
|
|
|
stream->encoding = encoding;
|
|
|
|
if (decoding_backends[stream->encoding]->open(stream, fd) >= 0)
|
|
|
|
return stream;
|
|
|
|
|
|
|
|
mem_free(stream);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Read available data from stream and decode them. Note that when data change
|
|
|
|
* their size during decoding, 'len' indicates desired size of _returned_ data,
|
|
|
|
* not desired size of data read from stream. */
|
|
|
|
int
|
2021-01-02 10:20:27 -05:00
|
|
|
read_encoded(struct stream_encoded *stream, char *data, int len)
|
2005-09-15 09:58:31 -04:00
|
|
|
{
|
|
|
|
return decoding_backends[stream->encoding]->read(stream, data, len);
|
|
|
|
}
|
|
|
|
|
2007-02-24 14:35:29 -05:00
|
|
|
/* Decode an entire file from a buffer. This function is not suitable
|
|
|
|
* for parts of files. @data contains the original data, @len bytes
|
|
|
|
* long. The resulting decoded data chunk is *@new_len bytes long. */
|
2021-01-02 10:20:27 -05:00
|
|
|
char *
|
|
|
|
decode_encoded_buffer(struct stream_encoded *stream, enum stream_encoding encoding, char *data, int len,
|
2005-09-15 09:58:31 -04:00
|
|
|
int *new_len)
|
|
|
|
{
|
2010-09-24 10:12:35 -04:00
|
|
|
return decoding_backends[encoding]->decode_buffer(stream, data, len, new_len);
|
2005-09-15 09:58:31 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Closes encoded stream. Note that fd associated with the stream will be
|
|
|
|
* closed here. */
|
|
|
|
void
|
|
|
|
close_encoded(struct stream_encoded *stream)
|
|
|
|
{
|
|
|
|
decoding_backends[stream->encoding]->close(stream);
|
|
|
|
mem_free(stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Return a list of extensions associated with that encoding. */
|
2021-01-02 10:20:27 -05:00
|
|
|
const char *const *listext_encoded(enum stream_encoding encoding)
|
2005-09-15 09:58:31 -04:00
|
|
|
{
|
|
|
|
return decoding_backends[encoding]->extensions;
|
|
|
|
}
|
|
|
|
|
|
|
|
enum stream_encoding
|
2021-01-02 10:20:27 -05:00
|
|
|
guess_encoding(char *filename)
|
2005-09-15 09:58:31 -04:00
|
|
|
{
|
|
|
|
int fname_len = strlen(filename);
|
2021-01-02 10:20:27 -05:00
|
|
|
char *fname_end = filename + fname_len;
|
2005-09-15 09:58:31 -04:00
|
|
|
int enc;
|
|
|
|
|
|
|
|
for (enc = 1; enc < ENCODINGS_KNOWN; enc++) {
|
2021-01-02 10:20:27 -05:00
|
|
|
const char *const *ext = decoding_backends[enc]->extensions;
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
while (ext && *ext) {
|
|
|
|
int len = strlen(*ext);
|
|
|
|
|
|
|
|
if (fname_len >= len && !strcmp(fname_end - len, *ext))
|
|
|
|
return enc;
|
|
|
|
|
|
|
|
ext++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ENCODING_NONE;
|
|
|
|
}
|
|
|
|
|
2021-01-02 10:20:27 -05:00
|
|
|
const char *
|
2005-09-15 09:58:31 -04:00
|
|
|
get_encoding_name(enum stream_encoding encoding)
|
|
|
|
{
|
|
|
|
return decoding_backends[encoding]->name;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* File reading */
|
|
|
|
|
|
|
|
/* Tries to open @prefixname with each of the supported encoding extensions
|
|
|
|
* appended. */
|
|
|
|
static inline enum stream_encoding
|
2019-04-21 06:27:40 -04:00
|
|
|
try_encoding_extensions(struct string *filename, int *fd)
|
2005-09-15 09:58:31 -04:00
|
|
|
{
|
|
|
|
int length = filename->length;
|
|
|
|
int encoding;
|
|
|
|
|
|
|
|
/* No file of that name was found, try some others names. */
|
|
|
|
for (encoding = 1; encoding < ENCODINGS_KNOWN; encoding++) {
|
2021-01-02 10:20:27 -05:00
|
|
|
const char *const *ext = listext_encoded(encoding);
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
for (; ext && *ext; ext++) {
|
|
|
|
add_to_string(filename, *ext);
|
|
|
|
|
|
|
|
/* We try with some extensions. */
|
|
|
|
*fd = open(filename->source, O_RDONLY | O_NOCTTY);
|
|
|
|
|
|
|
|
if (*fd >= 0)
|
|
|
|
/* Ok, found one, use it. */
|
|
|
|
return encoding;
|
|
|
|
|
|
|
|
filename->source[length] = 0;
|
|
|
|
filename->length = length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ENCODING_NONE;
|
|
|
|
}
|
|
|
|
|
2008-06-07 14:27:37 -04:00
|
|
|
/** Reads the file from @a stream in chunks of size @a readsize.
|
|
|
|
*
|
|
|
|
* @a stream should be in blocking mode. If it is in non-blocking
|
|
|
|
* mode, this function can return an empty string in @a page just
|
|
|
|
* because no more data is available yet, and the caller cannot know
|
|
|
|
* whether the true end of the stream has been reached.
|
|
|
|
*
|
|
|
|
* @return a connection state. S_OK if all is well. */
|
2008-08-03 08:24:26 -04:00
|
|
|
struct connection_state
|
2019-04-21 06:27:40 -04:00
|
|
|
read_file(struct stream_encoded *stream, int readsize, struct string *page)
|
2005-09-15 09:58:31 -04:00
|
|
|
{
|
2008-08-03 08:24:26 -04:00
|
|
|
if (!init_string(page)) return connection_state(S_OUT_OF_MEM);
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
/* We read with granularity of stt.st_size (given as @readsize) - this
|
|
|
|
* does best job for uncompressed files, and doesn't hurt for
|
|
|
|
* compressed ones anyway - very large files usually tend to inflate
|
|
|
|
* fast anyway. At least I hope ;). --pasky */
|
|
|
|
/* Also there because of bug in Linux. Read returns -EACCES when
|
|
|
|
* reading 0 bytes to invalid address so ensure never to try and
|
|
|
|
* allocate zero number of bytes. */
|
|
|
|
if (!readsize) readsize = 4096;
|
|
|
|
|
|
|
|
while (realloc_string(page, page->length + readsize)) {
|
2021-01-02 10:20:27 -05:00
|
|
|
char *string_pos = page->source + page->length;
|
2005-09-15 09:58:31 -04:00
|
|
|
int readlen = read_encoded(stream, string_pos, readsize);
|
|
|
|
|
|
|
|
if (readlen < 0) {
|
|
|
|
done_string(page);
|
|
|
|
|
|
|
|
/* If it is some I/O error (and errno is set) that will
|
|
|
|
* do. Since errno == 0 == S_WAIT and we cannot have
|
|
|
|
* that. */
|
|
|
|
if (errno)
|
2008-08-03 08:24:26 -04:00
|
|
|
return connection_state_for_errno(errno);
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
/* FIXME: This is indeed an internal error. If readed from a
|
|
|
|
* corrupted encoded file nothing or only some of the
|
|
|
|
* data will be read. */
|
2008-08-03 08:24:26 -04:00
|
|
|
return connection_state(S_ENCODE_ERROR);
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
} else if (readlen == 0) {
|
|
|
|
/* NUL-terminate just in case */
|
|
|
|
page->source[page->length] = '\0';
|
2008-08-03 08:24:26 -04:00
|
|
|
return connection_state(S_OK);
|
2005-09-15 09:58:31 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
page->length += readlen;
|
|
|
|
#if 0
|
|
|
|
/* This didn't work so well as it should (I had to implement
|
|
|
|
* end of stream handling to bzip2 anyway), so I rather
|
|
|
|
* disabled this. */
|
|
|
|
if (readlen < readsize) {
|
|
|
|
/* This is much safer. It should always mean that we
|
|
|
|
* already read everything possible, and it permits us
|
|
|
|
* more elegant of handling end of file with bzip2. */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
done_string(page);
|
2008-08-03 08:24:26 -04:00
|
|
|
return connection_state(S_OUT_OF_MEM);
|
2005-09-15 09:58:31 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline int
|
2019-04-21 06:27:40 -04:00
|
|
|
is_stdin_pipe(struct stat *stt, struct string *filename)
|
2005-09-15 09:58:31 -04:00
|
|
|
{
|
|
|
|
/* On Mac OS X, /dev/stdin has type S_IFSOCK. (bug 616) */
|
|
|
|
return !strlcmp(filename->source, filename->length, "/dev/stdin", 10)
|
|
|
|
&& (
|
|
|
|
#ifdef S_ISSOCK
|
|
|
|
S_ISSOCK(stt->st_mode) ||
|
|
|
|
#endif
|
|
|
|
S_ISFIFO(stt->st_mode));
|
|
|
|
}
|
|
|
|
|
2008-08-03 08:24:26 -04:00
|
|
|
struct connection_state
|
2019-04-21 06:27:40 -04:00
|
|
|
read_encoded_file(struct string *filename, struct string *page)
|
2005-09-15 09:58:31 -04:00
|
|
|
{
|
|
|
|
struct stream_encoded *stream;
|
|
|
|
struct stat stt;
|
|
|
|
enum stream_encoding encoding = ENCODING_NONE;
|
|
|
|
int fd = open(filename->source, O_RDONLY | O_NOCTTY);
|
2008-08-03 08:24:26 -04:00
|
|
|
struct connection_state state = connection_state_for_errno(errno);
|
2005-09-15 09:58:31 -04:00
|
|
|
|
2007-08-28 12:41:18 -04:00
|
|
|
if (fd == -1 && get_opt_bool("protocol.file.try_encoding_extensions", NULL)) {
|
2005-09-15 09:58:31 -04:00
|
|
|
encoding = try_encoding_extensions(filename, &fd);
|
|
|
|
|
|
|
|
} else if (fd != -1) {
|
|
|
|
encoding = guess_encoding(filename->source);
|
|
|
|
}
|
|
|
|
|
2008-06-15 12:25:13 -04:00
|
|
|
if (fd == -1) {
|
|
|
|
#ifdef HAVE_SYS_CYGWIN_H
|
|
|
|
/* There is no /dev/stdin on Cygwin. */
|
|
|
|
if (!strlcmp(filename->source, filename->length, "/dev/stdin", 10)) {
|
|
|
|
fd = STDIN_FILENO;
|
|
|
|
} else
|
|
|
|
#endif
|
2005-09-15 09:58:31 -04:00
|
|
|
return state;
|
2008-06-15 12:25:13 -04:00
|
|
|
}
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
/* Some file was opened so let's get down to bi'ness */
|
|
|
|
set_bin(fd);
|
|
|
|
|
|
|
|
/* Do all the necessary checks before trying to read the file.
|
|
|
|
* @state code is used to block further progress. */
|
|
|
|
if (fstat(fd, &stt)) {
|
2008-08-03 08:24:26 -04:00
|
|
|
state = connection_state_for_errno(errno);
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
} else if (!S_ISREG(stt.st_mode) && encoding != ENCODING_NONE) {
|
|
|
|
/* We only want to open regular encoded files. */
|
|
|
|
/* Leave @state being the saved errno */
|
|
|
|
|
|
|
|
} else if (!S_ISREG(stt.st_mode) && !is_stdin_pipe(&stt, filename)
|
2007-08-28 12:41:18 -04:00
|
|
|
&& !get_opt_bool("protocol.file.allow_special_files", NULL)) {
|
2008-08-03 08:24:26 -04:00
|
|
|
state = connection_state(S_FILE_TYPE);
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
} else if (!(stream = open_encoded(fd, encoding))) {
|
2008-08-03 08:24:26 -04:00
|
|
|
state = connection_state(S_OUT_OF_MEM);
|
2005-09-15 09:58:31 -04:00
|
|
|
|
|
|
|
} else {
|
|
|
|
int readsize = (int) stt.st_size;
|
|
|
|
|
|
|
|
/* Check if st_size will cause overflow. */
|
|
|
|
/* FIXME: See bug 497 for info about support for big files. */
|
|
|
|
if (readsize != stt.st_size || readsize < 0) {
|
|
|
|
#ifdef EFBIG
|
2008-08-03 08:24:26 -04:00
|
|
|
state = connection_state_for_errno(EFBIG);
|
2005-09-15 09:58:31 -04:00
|
|
|
#else
|
2008-08-03 08:24:26 -04:00
|
|
|
state = connection_state(S_FILE_ERROR);
|
2005-09-15 09:58:31 -04:00
|
|
|
#endif
|
|
|
|
|
|
|
|
} else {
|
|
|
|
state = read_file(stream, stt.st_size, page);
|
|
|
|
}
|
|
|
|
close_encoded(stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
close(fd);
|
|
|
|
return state;
|
|
|
|
}
|