1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-12-04 14:46:47 -05:00
elinks/src/encoding/encoding.c
2021-12-23 15:56:34 +01:00

363 lines
9.3 KiB
C

/* Stream reading and decoding (mostly decompression) */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h> /* OS/2 needs this after sys/types.h */
#include <sys/types.h>
#ifdef HAVE_FCNTL_H
#include <fcntl.h> /* OS/2 needs this after sys/types.h */
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "elinks.h"
#include "config/options.h"
#include "encoding/encoding.h"
#include "network/state.h"
#include "osdep/osdep.h"
#include "util/memory.h"
#include "util/string.h"
/*************************************************************************
Dummy encoding (ENCODING_NONE)
*************************************************************************/
struct dummy_enc_data {
int fd;
};
static int
dummy_open(struct stream_encoded *stream, int fd)
{
stream->data = mem_alloc(sizeof(struct dummy_enc_data));
if (!stream->data) return -1;
((struct dummy_enc_data *) stream->data)->fd = fd;
return 0;
}
static int
dummy_read(struct stream_encoded *stream, char *data, int len)
{
return safe_read(((struct dummy_enc_data *) stream->data)->fd, data, len);
}
static char *
dummy_decode_buffer(struct stream_encoded *stream, char *data, int len, int *new_len)
{
char *buffer = memacpy(data, len);
if (!buffer) return NULL;
*new_len = len;
return buffer;
}
static void
dummy_close(struct stream_encoded *stream)
{
close(((struct dummy_enc_data *) stream->data)->fd);
mem_free(stream->data);
}
static const char *const dummy_extensions[] = { NULL };
static const struct decoding_backend dummy_decoding_backend = {
"none",
dummy_extensions,
dummy_open,
dummy_read,
dummy_decode_buffer,
dummy_close,
};
/* Dynamic backend area */
#include "encoding/brotli.h"
#include "encoding/bzip2.h"
#include "encoding/gzip.h"
#include "encoding/lzma.h"
#include "encoding/zstd.h"
static const struct decoding_backend *const decoding_backends[] = {
&dummy_decoding_backend,
&gzip_decoding_backend,
&bzip2_decoding_backend,
&lzma_decoding_backend,
&brotli_decoding_backend,
&zstd_decoding_backend
};
/*************************************************************************
Public functions
*************************************************************************/
/* Associates encoded stream with a fd. */
struct stream_encoded *
open_encoded(int fd, enum stream_encoding encoding)
{
struct stream_encoded *stream;
stream = mem_alloc(sizeof(*stream));
if (!stream) return NULL;
stream->encoding = encoding;
if (decoding_backends[stream->encoding]->open(stream, fd) >= 0)
return stream;
mem_free(stream);
return NULL;
}
/* Read available data from stream and decode them. Note that when data change
* their size during decoding, 'len' indicates desired size of _returned_ data,
* not desired size of data read from stream. */
int
read_encoded(struct stream_encoded *stream, char *data, int len)
{
return decoding_backends[stream->encoding]->read(stream, data, len);
}
/* Decode an entire file from a buffer. This function is not suitable
* for parts of files. @data contains the original data, @len bytes
* long. The resulting decoded data chunk is *@new_len bytes long. */
char *
decode_encoded_buffer(struct stream_encoded *stream, enum stream_encoding encoding, char *data, int len,
int *new_len)
{
return decoding_backends[encoding]->decode_buffer(stream, data, len, new_len);
}
/* Closes encoded stream. Note that fd associated with the stream will be
* closed here. */
void
close_encoded(struct stream_encoded *stream)
{
decoding_backends[stream->encoding]->close(stream);
mem_free(stream);
}
/* Return a list of extensions associated with that encoding. */
const char *const *listext_encoded(enum stream_encoding encoding)
{
return decoding_backends[encoding]->extensions;
}
enum stream_encoding
guess_encoding(char *filename)
{
int fname_len = strlen(filename);
char *fname_end = filename + fname_len;
int enc;
for (enc = 1; enc < ENCODINGS_KNOWN; enc++) {
const char *const *ext = decoding_backends[enc]->extensions;
while (ext && *ext) {
int len = strlen(*ext);
if (fname_len >= len && !strcmp(fname_end - len, *ext))
return enc;
ext++;
}
}
return ENCODING_NONE;
}
const char *
get_encoding_name(enum stream_encoding encoding)
{
return decoding_backends[encoding]->name;
}
/* File reading */
/* Tries to open @prefixname with each of the supported encoding extensions
* appended. */
static inline enum stream_encoding
try_encoding_extensions(struct string *filename, int *fd)
{
int length = filename->length;
int encoding;
/* No file of that name was found, try some others names. */
for (encoding = 1; encoding < ENCODINGS_KNOWN; encoding++) {
const char *const *ext = listext_encoded(encoding);
for (; ext && *ext; ext++) {
add_to_string(filename, *ext);
/* We try with some extensions. */
*fd = open(filename->source, O_RDONLY | O_NOCTTY);
if (*fd >= 0)
/* Ok, found one, use it. */
return encoding;
filename->source[length] = 0;
filename->length = length;
}
}
return ENCODING_NONE;
}
/** Reads the file from @a stream in chunks of size @a readsize.
*
* @a stream should be in blocking mode. If it is in non-blocking
* mode, this function can return an empty string in @a page just
* because no more data is available yet, and the caller cannot know
* whether the true end of the stream has been reached.
*
* @return a connection state. S_OK if all is well. */
struct connection_state
read_file(struct stream_encoded *stream, int readsize, struct string *page)
{
if (!init_string(page)) return connection_state(S_OUT_OF_MEM);
/* We read with granularity of stt.st_size (given as @readsize) - this
* does best job for uncompressed files, and doesn't hurt for
* compressed ones anyway - very large files usually tend to inflate
* fast anyway. At least I hope ;). --pasky */
/* Also there because of bug in Linux. Read returns -EACCES when
* reading 0 bytes to invalid address so ensure never to try and
* allocate zero number of bytes. */
if (!readsize) readsize = 4096;
while (realloc_string(page, page->length + readsize)) {
char *string_pos = page->source + page->length;
int readlen = read_encoded(stream, string_pos, readsize);
if (readlen < 0) {
done_string(page);
/* If it is some I/O error (and errno is set) that will
* do. Since errno == 0 == S_WAIT and we cannot have
* that. */
if (errno)
return connection_state_for_errno(errno);
/* FIXME: This is indeed an internal error. If readed from a
* corrupted encoded file nothing or only some of the
* data will be read. */
return connection_state(S_ENCODE_ERROR);
} else if (readlen == 0) {
/* NUL-terminate just in case */
page->source[page->length] = '\0';
return connection_state(S_OK);
}
page->length += readlen;
#if 0
/* This didn't work so well as it should (I had to implement
* end of stream handling to bzip2 anyway), so I rather
* disabled this. */
if (readlen < readsize) {
/* This is much safer. It should always mean that we
* already read everything possible, and it permits us
* more elegant of handling end of file with bzip2. */
break;
}
#endif
}
done_string(page);
return connection_state(S_OUT_OF_MEM);
}
static inline int
is_stdin_pipe(struct stat *stt, struct string *filename)
{
/* On Mac OS X, /dev/stdin has type S_IFSOCK. (bug 616) */
return !strlcmp(filename->source, filename->length, "/dev/stdin", 10)
&& (
#ifdef S_ISSOCK
S_ISSOCK(stt->st_mode) ||
#endif
S_ISFIFO(stt->st_mode));
}
struct connection_state
read_encoded_file(struct string *filename, struct string *page)
{
struct stream_encoded *stream;
struct stat stt;
enum stream_encoding encoding = ENCODING_NONE;
int fd = open(filename->source, O_RDONLY | O_NOCTTY);
struct connection_state state = connection_state_for_errno(errno);
if (fd == -1 && get_opt_bool("protocol.file.try_encoding_extensions", NULL)) {
encoding = try_encoding_extensions(filename, &fd);
} else if (fd != -1) {
encoding = guess_encoding(filename->source);
}
if (fd == -1) {
#ifdef HAVE_SYS_CYGWIN_H
/* There is no /dev/stdin on Cygwin. */
if (!strlcmp(filename->source, filename->length, "/dev/stdin", 10)) {
fd = STDIN_FILENO;
} else
#endif
return state;
}
/* Some file was opened so let's get down to bi'ness */
set_bin(fd);
/* Do all the necessary checks before trying to read the file.
* @state code is used to block further progress. */
if (fstat(fd, &stt)) {
state = connection_state_for_errno(errno);
} else if (!S_ISREG(stt.st_mode) && encoding != ENCODING_NONE) {
/* We only want to open regular encoded files. */
/* Leave @state being the saved errno */
} else if (!S_ISREG(stt.st_mode) && !is_stdin_pipe(&stt, filename)
&& !get_opt_bool("protocol.file.allow_special_files", NULL)) {
state = connection_state(S_FILE_TYPE);
} else if (!(stream = open_encoded(fd, encoding))) {
state = connection_state(S_OUT_OF_MEM);
} else {
int readsize = (int) stt.st_size;
/* Check if st_size will cause overflow. */
/* FIXME: See bug 497 for info about support for big files. */
if (readsize != stt.st_size || readsize < 0) {
#ifdef EFBIG
state = connection_state_for_errno(EFBIG);
#else
state = connection_state(S_FILE_ERROR);
#endif
} else {
state = read_file(stream, stt.st_size, page);
}
close_encoded(stream);
}
close(fd);
return state;
}