/* Stream reading and decoding (mostly decompression) */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include /* OS/2 needs this after sys/types.h */ #include #ifdef HAVE_FCNTL_H #include /* OS/2 needs this after sys/types.h */ #endif #ifdef HAVE_UNISTD_H #include #endif #include "elinks.h" #include "config/options.h" #include "encoding/encoding.h" #include "network/state.h" #include "osdep/osdep.h" #include "util/memory.h" #include "util/string.h" /************************************************************************* Dummy encoding (ENCODING_NONE) *************************************************************************/ struct dummy_enc_data { int fd; }; static int dummy_open(struct stream_encoded *stream, int fd) { stream->data = mem_alloc(sizeof(struct dummy_enc_data)); if (!stream->data) return -1; ((struct dummy_enc_data *) stream->data)->fd = fd; return 0; } static int dummy_read(struct stream_encoded *stream, char *data, int len) { return safe_read(((struct dummy_enc_data *) stream->data)->fd, data, len); } static char * dummy_decode_buffer(struct stream_encoded *stream, char *data, int len, int *new_len) { char *buffer = memacpy(data, len); if (!buffer) return NULL; *new_len = len; return buffer; } static void dummy_close(struct stream_encoded *stream) { close(((struct dummy_enc_data *) stream->data)->fd); mem_free(stream->data); } static const char *const dummy_extensions[] = { NULL }; static const struct decoding_backend dummy_decoding_backend = { "none", dummy_extensions, dummy_open, dummy_read, dummy_decode_buffer, dummy_close, }; /* Dynamic backend area */ #include "encoding/brotli.h" #include "encoding/bzip2.h" #include "encoding/gzip.h" #include "encoding/lzma.h" #include "encoding/zstd.h" static const struct decoding_backend *const decoding_backends[] = { &dummy_decoding_backend, &gzip_decoding_backend, &bzip2_decoding_backend, &lzma_decoding_backend, &brotli_decoding_backend, &zstd_decoding_backend }; /************************************************************************* Public functions *************************************************************************/ /* Associates encoded stream with a fd. */ struct stream_encoded * open_encoded(int fd, stream_encoding_T encoding) { struct stream_encoded *stream; stream = (struct stream_encoded *)mem_alloc(sizeof(*stream)); if (!stream) return NULL; stream->encoding = encoding; if (decoding_backends[stream->encoding]->eopen(stream, fd) >= 0) return stream; mem_free(stream); return NULL; } /* Read available data from stream and decode them. Note that when data change * their size during decoding, 'len' indicates desired size of _returned_ data, * not desired size of data read from stream. */ int read_encoded(struct stream_encoded *stream, char *data, int len) { return decoding_backends[stream->encoding]->eread(stream, data, len); } /* Decode an entire file from a buffer. This function is not suitable * for parts of files. @data contains the original data, @len bytes * long. The resulting decoded data chunk is *@new_len bytes long. */ char * decode_encoded_buffer(struct stream_encoded *stream, stream_encoding_T encoding, char *data, int len, int *new_len) { return decoding_backends[encoding]->decode_buffer(stream, data, len, new_len); } /* Closes encoded stream. Note that fd associated with the stream will be * closed here. */ void close_encoded(struct stream_encoded *stream) { decoding_backends[stream->encoding]->eclose(stream); mem_free(stream); } /* Return a list of extensions associated with that encoding. */ const char *const *listext_encoded(stream_encoding_T encoding) { return decoding_backends[encoding]->extensions; } stream_encoding_T guess_encoding(char *filename) { int fname_len = strlen(filename); char *fname_end = filename + fname_len; int enc; for (enc = 1; enc < ENCODINGS_KNOWN; enc++) { const char *const *ext = decoding_backends[enc]->extensions; while (ext && *ext) { int len = strlen(*ext); if (fname_len >= len && !strcmp(fname_end - len, *ext)) return enc; ext++; } } return ENCODING_NONE; } const char * get_encoding_name(stream_encoding_T encoding) { return decoding_backends[encoding]->name; } /* File reading */ /* Tries to open @prefixname with each of the supported encoding extensions * appended. */ static inline stream_encoding_T try_encoding_extensions(struct string *filename, int *fd) { int length = filename->length; int encoding; /* No file of that name was found, try some others names. */ for (encoding = 1; encoding < ENCODINGS_KNOWN; encoding++) { const char *const *ext = listext_encoded(encoding); for (; ext && *ext; ext++) { add_to_string(filename, *ext); /* We try with some extensions. */ *fd = open(filename->source, O_RDONLY | O_NOCTTY); if (*fd >= 0) /* Ok, found one, use it. */ return encoding; filename->source[length] = 0; filename->length = length; } } return ENCODING_NONE; } /** Reads the file from @a stream in chunks of size @a readsize. * * @a stream should be in blocking mode. If it is in non-blocking * mode, this function can return an empty string in @a page just * because no more data is available yet, and the caller cannot know * whether the true end of the stream has been reached. * * @return a connection state. S_OK if all is well. */ struct connection_state read_file(struct stream_encoded *stream, int readsize, struct string *page) { if (!init_string(page)) return connection_state(S_OUT_OF_MEM); /* We read with granularity of stt.st_size (given as @readsize) - this * does best job for uncompressed files, and doesn't hurt for * compressed ones anyway - very large files usually tend to inflate * fast anyway. At least I hope ;). --pasky */ /* Also there because of bug in Linux. Read returns -EACCES when * reading 0 bytes to invalid address so ensure never to try and * allocate zero number of bytes. */ if (!readsize) readsize = 4096; while (realloc_string(page, page->length + readsize)) { char *string_pos = page->source + page->length; int readlen = read_encoded(stream, string_pos, readsize); if (readlen < 0) { done_string(page); /* If it is some I/O error (and errno is set) that will * do. Since errno == 0 == S_WAIT and we cannot have * that. */ if (errno) return connection_state_for_errno(errno); /* FIXME: This is indeed an internal error. If readed from a * corrupted encoded file nothing or only some of the * data will be read. */ return connection_state(S_ENCODE_ERROR); } else if (readlen == 0) { /* NUL-terminate just in case */ page->source[page->length] = '\0'; return connection_state(S_OK); } page->length += readlen; #if 0 /* This didn't work so well as it should (I had to implement * end of stream handling to bzip2 anyway), so I rather * disabled this. */ if (readlen < readsize) { /* This is much safer. It should always mean that we * already read everything possible, and it permits us * more elegant of handling end of file with bzip2. */ break; } #endif } done_string(page); return connection_state(S_OUT_OF_MEM); } static inline int is_stdin_pipe(struct stat *stt, struct string *filename) { /* On Mac OS X, /dev/stdin has type S_IFSOCK. (bug 616) */ return !strlcmp(filename->source, filename->length, "/dev/stdin", 10) && ( #ifdef S_ISSOCK S_ISSOCK(stt->st_mode) || #endif S_ISFIFO(stt->st_mode)); } struct connection_state read_encoded_file(struct string *filename, struct string *page) { struct stream_encoded *stream; struct stat stt; stream_encoding_T encoding = ENCODING_NONE; int fd = open(filename->source, O_RDONLY | O_NOCTTY); struct connection_state state = connection_state_for_errno(errno); if (fd == -1 && get_opt_bool("protocol.file.try_encoding_extensions", NULL)) { encoding = try_encoding_extensions(filename, &fd); } else if (fd != -1) { encoding = guess_encoding(filename->source); } if (fd == -1) { #ifdef HAVE_SYS_CYGWIN_H /* There is no /dev/stdin on Cygwin. */ if (!strlcmp(filename->source, filename->length, "/dev/stdin", 10)) { fd = STDIN_FILENO; } else #endif return state; } /* Some file was opened so let's get down to bi'ness */ set_bin(fd); /* Do all the necessary checks before trying to read the file. * @state code is used to block further progress. */ if (fstat(fd, &stt)) { state = connection_state_for_errno(errno); } else if (!S_ISREG(stt.st_mode) && encoding != ENCODING_NONE) { /* We only want to open regular encoded files. */ /* Leave @state being the saved errno */ } else if (!S_ISREG(stt.st_mode) && !is_stdin_pipe(&stt, filename) && !get_opt_bool("protocol.file.allow_special_files", NULL)) { state = connection_state(S_FILE_TYPE); } else if (!(stream = open_encoded(fd, encoding))) { state = connection_state(S_OUT_OF_MEM); } else { int readsize = (int) stt.st_size; /* Check if st_size will cause overflow. */ /* FIXME: See bug 497 for info about support for big files. */ if (readsize != stt.st_size || readsize < 0) { #ifdef EFBIG state = connection_state_for_errno(EFBIG); #else state = connection_state(S_FILE_ERROR); #endif } else { state = read_file(stream, stt.st_size, page); } close_encoded(stream); } close(fd); return state; }