sbase/tar.c

351 lines
7.0 KiB
C
Raw Normal View History

2013-07-18 11:15:35 -04:00
/* See LICENSE file for copyright and license details. */
#include <sys/stat.h>
#include <sys/time.h>
#include <grp.h>
#include <pwd.h>
#include <stdio.h>
#include <stdlib.h>
2013-07-18 11:15:35 -04:00
#include <string.h>
#include <unistd.h>
#include "fs.h"
2013-07-18 11:15:35 -04:00
#include "util.h"
2015-02-16 13:47:36 -05:00
struct header {
2013-07-18 11:15:35 -04:00
char name[100];
char mode[8];
char uid[8];
char gid[8];
char size[12];
char mtime[12];
char chksum[8];
char type;
char link[100];
char magic[6];
char version[2];
2013-07-18 11:15:35 -04:00
char uname[32];
char gname[32];
char major[8];
char minor[8];
char prefix[155];
2013-07-18 11:15:35 -04:00
};
2015-02-16 13:47:36 -05:00
#define BLKSIZ 512
2013-07-18 11:15:35 -04:00
enum Type {
REG = '0', AREG = '\0', HARDLINK = '1', SYMLINK = '2', CHARDEV = '3',
2013-07-19 12:05:28 -04:00
BLOCKDEV = '4', DIRECTORY = '5', FIFO = '6'
2013-07-18 11:15:35 -04:00
};
static FILE *tarfile;
static ino_t tarinode;
static dev_t tardev;
2013-07-18 11:15:35 -04:00
2015-02-16 13:47:36 -05:00
static int mflag;
static char filtermode;
2013-07-18 11:15:35 -04:00
static FILE *
decomp(FILE *fp)
2013-07-18 11:15:35 -04:00
{
2015-02-16 13:47:36 -05:00
int fds[2];
if (pipe(fds) < 0)
eprintf("pipe:");
switch (fork()) {
case -1:
eprintf("fork:");
case 0:
dup2(fileno(fp), 0);
dup2(fds[1], 1);
close(fds[0]);
close(fds[1]);
switch (filtermode) {
case 'j':
execlp("bzip2", "bzip2", "-cd", NULL);
weprintf("execlp bzip2:");
_exit(1);
case 'z':
execlp("gzip", "gzip", "-cd", NULL);
weprintf("execlp gzip:");
_exit(1);
2013-07-18 11:15:35 -04:00
}
}
close(fds[1]);
return fdopen(fds[0], "r");
2013-07-18 11:15:35 -04:00
}
static void
2013-07-18 11:15:35 -04:00
putoctal(char *dst, unsigned num, int n)
{
2015-02-16 13:47:36 -05:00
snprintf(dst, n, "%.*o", n - 1, num);
2013-07-18 11:15:35 -04:00
}
static int
archive(const char* path)
2013-07-18 11:15:35 -04:00
{
2015-03-03 05:26:59 -05:00
FILE *f = NULL;
2013-07-18 11:15:35 -04:00
mode_t mode;
2015-02-16 13:47:36 -05:00
struct group *gr;
struct header *h;
struct passwd *pw;
struct stat st;
2015-03-03 05:26:59 -05:00
size_t chksum, x;
ssize_t l;
2015-02-16 13:47:36 -05:00
unsigned char b[BLKSIZ];
2013-07-18 11:15:35 -04:00
lstat(path, &st);
if (st.st_ino == tarinode && st.st_dev == tardev) {
fprintf(stderr, "ignoring '%s'\n", path);
return 0;
}
2013-07-18 11:15:35 -04:00
pw = getpwuid(st.st_uid);
gr = getgrgid(st.st_gid);
2015-02-16 13:47:36 -05:00
h = (void*)b;
memset(b, 0, sizeof(b));
snprintf(h->name, sizeof(h->name), "%s", path);
putoctal(h->mode, (unsigned)st.st_mode & 0777, sizeof(h->mode));
putoctal(h->uid, (unsigned)st.st_uid, sizeof(h->uid));
putoctal(h->gid, (unsigned)st.st_gid, sizeof(h->gid));
putoctal(h->size, 0, sizeof(h->size));
putoctal(h->mtime, (unsigned)st.st_mtime, sizeof(h->mtime));
memcpy(h->magic, "ustar", sizeof(h->magic));
memcpy(h->version, "00", sizeof(h->version));
snprintf(h->uname, sizeof h->uname, "%s", pw ? pw->pw_name : "");
snprintf(h->gname, sizeof h->gname, "%s", gr ? gr->gr_name : "");
2013-07-18 11:15:35 -04:00
mode = st.st_mode;
if (S_ISREG(mode)) {
2013-07-18 11:15:35 -04:00
h->type = REG;
putoctal(h->size, (unsigned)st.st_size, sizeof h->size);
f = fopen(path, "r");
} else if (S_ISDIR(mode)) {
2013-07-18 11:15:35 -04:00
h->type = DIRECTORY;
} else if (S_ISLNK(mode)) {
2013-07-18 11:15:35 -04:00
h->type = SYMLINK;
readlink(path, h->link, (sizeof h->link)-1);
} else if (S_ISCHR(mode) || S_ISBLK(mode)) {
2013-07-18 11:15:35 -04:00
h->type = S_ISCHR(mode) ? CHARDEV : BLOCKDEV;
#if defined(major) && defined(minor)
2013-07-18 11:15:35 -04:00
putoctal(h->major, (unsigned)major(st.st_dev), sizeof h->major);
putoctal(h->minor, (unsigned)minor(st.st_dev), sizeof h->minor);
#else
return 0;
#endif
} else if (S_ISFIFO(mode)) {
2013-07-18 11:15:35 -04:00
h->type = FIFO;
}
memset(h->chksum, ' ', sizeof h->chksum);
for (x = 0, chksum = 0; x < sizeof *h; x++)
2013-07-18 11:15:35 -04:00
chksum += b[x];
putoctal(h->chksum, chksum, sizeof h->chksum);
2015-02-16 13:47:36 -05:00
fwrite(b, BLKSIZ, 1, tarfile);
if (!f)
2013-07-18 11:15:35 -04:00
return 0;
2015-02-16 13:47:36 -05:00
while ((l = fread(b, 1, BLKSIZ, f)) > 0) {
if (l < BLKSIZ)
memset(b+l, 0, BLKSIZ-l);
fwrite(b, BLKSIZ, 1, tarfile);
2013-07-18 11:15:35 -04:00
}
fclose(f);
2013-07-19 12:05:28 -04:00
return 0;
2013-07-18 11:15:35 -04:00
}
static int
2015-02-16 13:47:36 -05:00
unarchive(char *fname, int l, char b[BLKSIZ])
2013-07-18 11:15:35 -04:00
{
FILE *f = NULL;
struct timeval times[2];
2015-02-16 13:47:36 -05:00
struct header *h = (void*)b;
unsigned long mode, major, minor, type, mtime;
char lname[101];
2013-07-18 11:15:35 -04:00
if (!mflag)
mtime = strtoul(h->mtime, 0, 8);
2013-07-18 11:15:35 -04:00
unlink(fname);
switch (h->type) {
2013-07-18 11:15:35 -04:00
case REG:
case AREG:
2013-07-18 11:15:35 -04:00
mode = strtoul(h->mode, 0, 8);
if (!(f = fopen(fname, "w")) || chmod(fname, mode))
2013-07-18 11:15:35 -04:00
perror(fname);
break;
case HARDLINK:
case SYMLINK:
snprintf(lname, sizeof lname, "%s", h->link);
if (!((h->type == HARDLINK) ? link : symlink)(lname, fname))
2013-07-18 11:15:35 -04:00
perror(fname);
break;
case DIRECTORY:
mode = strtoul(h->mode, 0, 8);
if (mkdir(fname, (mode_t)mode))
2013-07-18 11:15:35 -04:00
perror(fname);
break;
case CHARDEV:
case BLOCKDEV:
#ifdef makedev
2013-07-18 11:15:35 -04:00
mode = strtoul(h->mode, 0, 8);
major = strtoul(h->major, 0, 8);
minor = strtoul(h->mode, 0, 8);
type = (h->type == CHARDEV) ? S_IFCHR : S_IFBLK;
if (mknod(fname, type | mode, makedev(major, minor)))
2013-07-18 11:15:35 -04:00
perror(fname);
#endif
2013-07-18 11:15:35 -04:00
break;
case FIFO:
mode = strtoul(h->mode, 0, 8);
if (mknod(fname, S_IFIFO | mode, 0))
2013-07-18 11:15:35 -04:00
perror(fname);
break;
default:
fprintf(stderr, "usupported tarfiletype %c\n", h->type);
}
2015-02-16 13:47:36 -05:00
if (getuid() == 0 && chown(fname, strtoul(h->uid, 0, 8), strtoul(h->gid, 0, 8)))
2013-07-18 11:15:35 -04:00
perror(fname);
2015-02-16 13:47:36 -05:00
for (; l > 0; l -= BLKSIZ) {
fread(b, BLKSIZ, 1, tarfile);
if (f)
2013-07-18 11:15:35 -04:00
fwrite(b, MIN(l, 512), 1, f);
}
if (f)
2013-07-18 11:15:35 -04:00
fclose(f);
if (!mflag) {
times[0].tv_sec = times[1].tv_sec = mtime;
times[0].tv_usec = times[1].tv_usec = 0;
if (utimes(fname, times))
perror(fname);
}
2013-07-18 11:15:35 -04:00
return 0;
}
static int
2015-02-16 13:47:36 -05:00
print(char * fname, int l, char b[BLKSIZ])
2013-07-18 11:15:35 -04:00
{
puts(fname);
2015-02-16 13:47:36 -05:00
for (; l > 0; l -= BLKSIZ)
fread(b, BLKSIZ, 1, tarfile);
2013-07-18 11:15:35 -04:00
return 0;
}
static void
Refactor recurse() again Okay, why yet another recurse()-refactor? The last one added the recursor-struct, which simplified things on the user-end, but there was still one thing that bugged me a lot: Previously, all fn()'s were forced to (l)stat the paths themselves. This does not work well when you try to keep up with H-, L- and P- flags at the same time, as each utility-function would have to set the right function-pointer for (l)stat every single time. This is not desirable. Furthermore, recurse should be easy to use and not involve trouble finding the right (l)stat-function to do it right. So, what we needed was a stat-argument for each fn(), so it is directly accessible. This was impossible to do though when the fn()'s are still directly called by the programs to "start" the recurse. Thus, the fundamental change is to make recurse() the function to go, while designing the fn()'s in a way they can "live" with st being NULL (we don't want a null-pointer-deref). What you can see in this commit is the result of this work. Why all this trouble instead of using nftw? The special thing about recurse() is that you tell the function when to recurse() in your fn(). You don't need special flags to tell nftw() to skip the subtree, just to give an example. The only single downside to this is that now, you are not allowed to unconditionally call recurse() from your fn(). It has to be a directory. However, that is a cost I think is easily weighed up by the advantages. Another thing is the history: I added a procedure at the end of the outmost recurse to free the history. This way we don't leak memory. A simple optimization on the side: - if (h->dev == st.st_dev && h->ino == st.st_ino) + if (h->ino == st.st_ino && h->dev == st.st_dev) First compare the likely difference in inode-numbers instead of checking the unlikely condition that the device-numbers are different.
2015-03-18 19:53:42 -04:00
c(const char *path, struct stat *st, void *data, struct recursor *r)
2013-07-18 11:15:35 -04:00
{
archive(path);
Refactor recurse() again Okay, why yet another recurse()-refactor? The last one added the recursor-struct, which simplified things on the user-end, but there was still one thing that bugged me a lot: Previously, all fn()'s were forced to (l)stat the paths themselves. This does not work well when you try to keep up with H-, L- and P- flags at the same time, as each utility-function would have to set the right function-pointer for (l)stat every single time. This is not desirable. Furthermore, recurse should be easy to use and not involve trouble finding the right (l)stat-function to do it right. So, what we needed was a stat-argument for each fn(), so it is directly accessible. This was impossible to do though when the fn()'s are still directly called by the programs to "start" the recurse. Thus, the fundamental change is to make recurse() the function to go, while designing the fn()'s in a way they can "live" with st being NULL (we don't want a null-pointer-deref). What you can see in this commit is the result of this work. Why all this trouble instead of using nftw? The special thing about recurse() is that you tell the function when to recurse() in your fn(). You don't need special flags to tell nftw() to skip the subtree, just to give an example. The only single downside to this is that now, you are not allowed to unconditionally call recurse() from your fn(). It has to be a directory. However, that is a cost I think is easily weighed up by the advantages. Another thing is the history: I added a procedure at the end of the outmost recurse to free the history. This way we don't leak memory. A simple optimization on the side: - if (h->dev == st.st_dev && h->ino == st.st_ino) + if (h->ino == st.st_ino && h->dev == st.st_dev) First compare the likely difference in inode-numbers instead of checking the unlikely condition that the device-numbers are different.
2015-03-18 19:53:42 -04:00
if (st && S_ISDIR(st->st_mode))
recurse(path, NULL, r);
2013-07-18 11:15:35 -04:00
}
static void
xt(int (*fn)(char *, int, char[BLKSIZ]))
2013-07-18 11:15:35 -04:00
{
2015-02-16 13:47:36 -05:00
char b[BLKSIZ], fname[257], *s;
struct header *h = (void*)b;
2013-07-18 11:15:35 -04:00
2015-02-16 13:47:36 -05:00
while (fread(b, BLKSIZ, 1, tarfile) && h->name[0] != '\0') {
s = fname;
if (h->prefix[0] != '\0')
s += sprintf(s, "%.*s/", (int)sizeof h->prefix, h->prefix);
sprintf(s, "%.*s", (int)sizeof h->name, h->name);
2013-07-18 11:15:35 -04:00
fn(fname, strtol(h->size, 0, 8), b);
}
}
static void
usage(void)
{
eprintf("usage: tar [-f tarfile] [-C dir] -j|z -x[m]|t\n"
2015-01-26 11:14:45 -05:00
" tar [-f tarfile] [-C dir] -c dir\n");
}
int
main(int argc, char *argv[])
{
FILE *fp;
struct recursor r = { .fn = c, .hist = NULL, .depth = 0, .follow = 'P', .flags = 0};
2015-02-16 13:47:36 -05:00
struct stat st;
char *file = NULL, *dir = ".", mode = '\0';
ARGBEGIN {
case 'x':
case 'c':
case 't':
if (mode)
usage();
mode = ARGC();
break;
case 'C':
dir = EARGF(usage());
break;
case 'f':
file = EARGF(usage());
break;
case 'm':
mflag = 1;
break;
case 'j':
case 'z':
if (filtermode)
usage();
filtermode = ARGC();
break;
case 'h':
r.follow = 'L';
break;
default:
usage();
} ARGEND;
if (!mode || argc != (mode == 'c'))
usage();
switch (mode) {
case 'c':
if (file) {
if (!(fp = fopen(file, "w")))
eprintf("fopen %s:", file);
if (lstat(file, &st) < 0)
eprintf("tar: stat '%s':", file);
tarinode = st.st_ino;
tardev = st.st_dev;
tarfile = fp;
} else {
tarfile = stdout;
}
chdir(dir);
Refactor recurse() again Okay, why yet another recurse()-refactor? The last one added the recursor-struct, which simplified things on the user-end, but there was still one thing that bugged me a lot: Previously, all fn()'s were forced to (l)stat the paths themselves. This does not work well when you try to keep up with H-, L- and P- flags at the same time, as each utility-function would have to set the right function-pointer for (l)stat every single time. This is not desirable. Furthermore, recurse should be easy to use and not involve trouble finding the right (l)stat-function to do it right. So, what we needed was a stat-argument for each fn(), so it is directly accessible. This was impossible to do though when the fn()'s are still directly called by the programs to "start" the recurse. Thus, the fundamental change is to make recurse() the function to go, while designing the fn()'s in a way they can "live" with st being NULL (we don't want a null-pointer-deref). What you can see in this commit is the result of this work. Why all this trouble instead of using nftw? The special thing about recurse() is that you tell the function when to recurse() in your fn(). You don't need special flags to tell nftw() to skip the subtree, just to give an example. The only single downside to this is that now, you are not allowed to unconditionally call recurse() from your fn(). It has to be a directory. However, that is a cost I think is easily weighed up by the advantages. Another thing is the history: I added a procedure at the end of the outmost recurse to free the history. This way we don't leak memory. A simple optimization on the side: - if (h->dev == st.st_dev && h->ino == st.st_ino) + if (h->ino == st.st_ino && h->dev == st.st_dev) First compare the likely difference in inode-numbers instead of checking the unlikely condition that the device-numbers are different.
2015-03-18 19:53:42 -04:00
recurse(argv[0], NULL, &r);
break;
case 't':
case 'x':
if (file) {
if (!(fp = fopen(file, "r")))
eprintf("fopen %s:", file);
} else {
fp = stdin;
}
switch (filtermode) {
case 'j':
case 'z':
tarfile = decomp(fp);
break;
default:
tarfile = fp;
break;
}
chdir(dir);
xt(mode == 'x' ? unarchive : print);
break;
}
return recurse_status;
}