2011-05-24 19:24:33 -04:00
|
|
|
/* See LICENSE file for copyright and license details. */
|
|
|
|
#include <dirent.h>
|
2015-03-11 18:21:52 -04:00
|
|
|
#include <errno.h>
|
2014-01-30 07:37:35 -05:00
|
|
|
#include <limits.h>
|
|
|
|
#include <stdio.h>
|
2011-05-24 19:24:33 -04:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2011-06-04 09:30:54 -04:00
|
|
|
#include <sys/stat.h>
|
2014-01-30 07:37:35 -05:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <unistd.h>
|
2013-03-05 15:46:48 -05:00
|
|
|
|
2015-03-12 19:25:32 -04:00
|
|
|
#include "../fs.h"
|
2011-05-24 19:24:33 -04:00
|
|
|
#include "../util.h"
|
|
|
|
|
2015-03-12 19:25:32 -04:00
|
|
|
int recurse_status = 0;
|
2015-03-02 15:43:56 -05:00
|
|
|
|
2011-05-24 19:24:33 -04:00
|
|
|
void
|
2015-03-12 19:25:32 -04:00
|
|
|
recurse(const char *path, void *data, struct recursor *r)
|
2011-05-24 19:24:33 -04:00
|
|
|
{
|
|
|
|
struct dirent *d;
|
2015-03-12 19:25:32 -04:00
|
|
|
struct history *new, *h;
|
|
|
|
struct stat st, dst;
|
2011-05-24 19:24:33 -04:00
|
|
|
DIR *dp;
|
2015-03-12 19:25:32 -04:00
|
|
|
int (*statf)(const char *, struct stat *);
|
|
|
|
char subpath[PATH_MAX], *statf_name;
|
2011-05-24 19:24:33 -04:00
|
|
|
|
2015-03-12 19:25:32 -04:00
|
|
|
if (r->follow == 'P' || (r->follow == 'H' && r->depth)) {
|
|
|
|
statf_name = "lstat";
|
|
|
|
statf = lstat;
|
|
|
|
} else {
|
|
|
|
statf_name = "stat";
|
|
|
|
statf = stat;
|
2015-03-11 18:21:52 -04:00
|
|
|
}
|
2015-03-12 19:25:32 -04:00
|
|
|
|
|
|
|
if (statf(path, &st) < 0) {
|
2015-04-19 08:00:47 -04:00
|
|
|
if (!(r->flags & SILENT)) {
|
|
|
|
weprintf("%s %s:", statf_name, path);
|
|
|
|
recurse_status = 1;
|
|
|
|
}
|
2015-03-11 18:21:52 -04:00
|
|
|
return;
|
|
|
|
}
|
2015-04-18 16:04:49 -04:00
|
|
|
if (!S_ISDIR(st.st_mode)) {
|
Refactor recurse() again
Okay, why yet another recurse()-refactor?
The last one added the recursor-struct, which simplified things
on the user-end, but there was still one thing that bugged me a lot:
Previously, all fn()'s were forced to (l)stat the paths themselves.
This does not work well when you try to keep up with H-, L- and P-
flags at the same time, as each utility-function would have to set
the right function-pointer for (l)stat every single time.
This is not desirable. Furthermore, recurse should be easy to use
and not involve trouble finding the right (l)stat-function to do it
right.
So, what we needed was a stat-argument for each fn(), so it is
directly accessible. This was impossible to do though when the
fn()'s are still directly called by the programs to "start" the
recurse.
Thus, the fundamental change is to make recurse() the function to
go, while designing the fn()'s in a way they can "live" with st
being NULL (we don't want a null-pointer-deref).
What you can see in this commit is the result of this work. Why
all this trouble instead of using nftw?
The special thing about recurse() is that you tell the function
when to recurse() in your fn(). You don't need special flags to
tell nftw() to skip the subtree, just to give an example.
The only single downside to this is that now, you are not allowed
to unconditionally call recurse() from your fn(). It has to be
a directory.
However, that is a cost I think is easily weighed up by the
advantages.
Another thing is the history: I added a procedure at the end of
the outmost recurse to free the history. This way we don't leak
memory.
A simple optimization on the side:
- if (h->dev == st.st_dev && h->ino == st.st_ino)
+ if (h->ino == st.st_ino && h->dev == st.st_dev)
First compare the likely difference in inode-numbers instead of
checking the unlikely condition that the device-numbers are
different.
2015-03-18 19:53:42 -04:00
|
|
|
(r->fn)(path, &st, data, r);
|
2011-06-04 09:30:54 -04:00
|
|
|
return;
|
Refactor recurse() again
Okay, why yet another recurse()-refactor?
The last one added the recursor-struct, which simplified things
on the user-end, but there was still one thing that bugged me a lot:
Previously, all fn()'s were forced to (l)stat the paths themselves.
This does not work well when you try to keep up with H-, L- and P-
flags at the same time, as each utility-function would have to set
the right function-pointer for (l)stat every single time.
This is not desirable. Furthermore, recurse should be easy to use
and not involve trouble finding the right (l)stat-function to do it
right.
So, what we needed was a stat-argument for each fn(), so it is
directly accessible. This was impossible to do though when the
fn()'s are still directly called by the programs to "start" the
recurse.
Thus, the fundamental change is to make recurse() the function to
go, while designing the fn()'s in a way they can "live" with st
being NULL (we don't want a null-pointer-deref).
What you can see in this commit is the result of this work. Why
all this trouble instead of using nftw?
The special thing about recurse() is that you tell the function
when to recurse() in your fn(). You don't need special flags to
tell nftw() to skip the subtree, just to give an example.
The only single downside to this is that now, you are not allowed
to unconditionally call recurse() from your fn(). It has to be
a directory.
However, that is a cost I think is easily weighed up by the
advantages.
Another thing is the history: I added a procedure at the end of
the outmost recurse to free the history. This way we don't leak
memory.
A simple optimization on the side:
- if (h->dev == st.st_dev && h->ino == st.st_ino)
+ if (h->ino == st.st_ino && h->dev == st.st_dev)
First compare the likely difference in inode-numbers instead of
checking the unlikely condition that the device-numbers are
different.
2015-03-18 19:53:42 -04:00
|
|
|
}
|
2014-06-30 10:58:00 -04:00
|
|
|
|
2015-03-12 19:25:32 -04:00
|
|
|
new = emalloc(sizeof(struct history));
|
|
|
|
new->prev = r->hist;
|
|
|
|
r->hist = new;
|
|
|
|
new->dev = st.st_dev;
|
|
|
|
new->ino = st.st_ino;
|
|
|
|
|
|
|
|
for (h = new->prev; h; h = h->prev)
|
Refactor recurse() again
Okay, why yet another recurse()-refactor?
The last one added the recursor-struct, which simplified things
on the user-end, but there was still one thing that bugged me a lot:
Previously, all fn()'s were forced to (l)stat the paths themselves.
This does not work well when you try to keep up with H-, L- and P-
flags at the same time, as each utility-function would have to set
the right function-pointer for (l)stat every single time.
This is not desirable. Furthermore, recurse should be easy to use
and not involve trouble finding the right (l)stat-function to do it
right.
So, what we needed was a stat-argument for each fn(), so it is
directly accessible. This was impossible to do though when the
fn()'s are still directly called by the programs to "start" the
recurse.
Thus, the fundamental change is to make recurse() the function to
go, while designing the fn()'s in a way they can "live" with st
being NULL (we don't want a null-pointer-deref).
What you can see in this commit is the result of this work. Why
all this trouble instead of using nftw?
The special thing about recurse() is that you tell the function
when to recurse() in your fn(). You don't need special flags to
tell nftw() to skip the subtree, just to give an example.
The only single downside to this is that now, you are not allowed
to unconditionally call recurse() from your fn(). It has to be
a directory.
However, that is a cost I think is easily weighed up by the
advantages.
Another thing is the history: I added a procedure at the end of
the outmost recurse to free the history. This way we don't leak
memory.
A simple optimization on the side:
- if (h->dev == st.st_dev && h->ino == st.st_ino)
+ if (h->ino == st.st_ino && h->dev == st.st_dev)
First compare the likely difference in inode-numbers instead of
checking the unlikely condition that the device-numbers are
different.
2015-03-18 19:53:42 -04:00
|
|
|
if (h->ino == st.st_ino && h->dev == st.st_dev)
|
2015-03-12 19:25:32 -04:00
|
|
|
return;
|
|
|
|
|
Audit tar(1), add DIRFIRST-flag to recurse()
I've been wanting to do this for a while now, as tar(1) used to
be one of messiest and cruftiest tools.
First off, before walking through the audit, I'll talk about
what the DIRFIRST-flag for recurse() does.
It basically calls fn() on the first-level-dir before calling
it's subentries. It's necessary here, because else the order
of the tar-files would've been wrong (it would try to create
dir/file before creating dir/).
Now, to the audit:
1) Update manpage, fix mistake that compression is also available
for compressing. It's only available for extracting.
2) Define the major, minor and makedev macros from glibc by ourselves.
No need to rely on them, as they are common sense.
decomp()
3) Simple refactorization.
putoctal()
4) Add a truncation check for snprintf().
archive()
5) BUGFIX: Add checks to any checkable function, don't blindly call
them, this is harmful and there are 100 ways to exploit that.
6) Use estrlcpy() instead of snprintf() wherever possible, fix
alignment.
7) BUGFIX: Terminate the result-buffer of readlink(), check if
it even succeeded.
8) Fix sizeof()-formatting.
unarchive()
9) BUGFIX: Add checks to any checkable function, don't blindly call
them, this is harmful and there are 100 ways to exploit that.
10) BUGFIX: strtoul can happily return negative numbers. Add checks
for that and also if the full string has been processed.
11) Remove calls to perror(). We have eprintf, use it.
12) BUGFIX: "minor = strtoul(h->mode, 0, 8);". We need h->minor of
course.
13) Fix typo "usupported", remove fprintf-call.
print()
14) Check fread().
xt()
15) Get rid of snprintf-magic. Use estrlcat().
16) BUGFIX: check for ferror() on the tarfile.
usage()
17) Update it. The old usage() was like 1000 years old.
main()
18) Add DIRFIRST-flag to the recursor.
19) Don't print usage() when a mode is re-set. We allow this in
general.
20) Add function checks and fix error messages.
21) Add tarfilename-global for proper error-messages.
2015-03-20 20:03:35 -04:00
|
|
|
if (!r->depth && (r->flags & DIRFIRST))
|
|
|
|
(r->fn)(path, &st, data, r);
|
|
|
|
|
2015-04-18 16:04:49 -04:00
|
|
|
if (!r->maxdepth || r->depth + 1 < r->maxdepth) {
|
2017-10-01 04:39:27 -04:00
|
|
|
if (!(dp = opendir(path))) {
|
|
|
|
if (!(r->flags & SILENT)) {
|
|
|
|
weprintf("opendir %s:", path);
|
|
|
|
recurse_status = 1;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
2015-04-18 16:04:49 -04:00
|
|
|
while ((d = readdir(dp))) {
|
|
|
|
if (r->follow == 'H') {
|
|
|
|
statf_name = "lstat";
|
|
|
|
statf = lstat;
|
|
|
|
}
|
|
|
|
if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
|
|
|
|
continue;
|
|
|
|
estrlcpy(subpath, path, sizeof(subpath));
|
|
|
|
if (path[strlen(path) - 1] != '/')
|
|
|
|
estrlcat(subpath, "/", sizeof(subpath));
|
|
|
|
estrlcat(subpath, d->d_name, sizeof(subpath));
|
|
|
|
if (statf(subpath, &dst) < 0) {
|
2015-04-19 08:00:47 -04:00
|
|
|
if (!(r->flags & SILENT)) {
|
|
|
|
weprintf("%s %s:", statf_name, subpath);
|
|
|
|
recurse_status = 1;
|
|
|
|
}
|
2015-04-18 16:04:49 -04:00
|
|
|
} else if ((r->flags & SAMEDEV) && dst.st_dev != st.st_dev) {
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
r->depth++;
|
|
|
|
(r->fn)(subpath, &dst, data, r);
|
|
|
|
r->depth--;
|
|
|
|
}
|
2015-03-12 19:25:32 -04:00
|
|
|
}
|
2017-10-01 04:39:27 -04:00
|
|
|
closedir(dp);
|
2013-03-05 15:46:48 -05:00
|
|
|
}
|
2011-05-24 19:24:33 -04:00
|
|
|
|
Refactor recurse() again
Okay, why yet another recurse()-refactor?
The last one added the recursor-struct, which simplified things
on the user-end, but there was still one thing that bugged me a lot:
Previously, all fn()'s were forced to (l)stat the paths themselves.
This does not work well when you try to keep up with H-, L- and P-
flags at the same time, as each utility-function would have to set
the right function-pointer for (l)stat every single time.
This is not desirable. Furthermore, recurse should be easy to use
and not involve trouble finding the right (l)stat-function to do it
right.
So, what we needed was a stat-argument for each fn(), so it is
directly accessible. This was impossible to do though when the
fn()'s are still directly called by the programs to "start" the
recurse.
Thus, the fundamental change is to make recurse() the function to
go, while designing the fn()'s in a way they can "live" with st
being NULL (we don't want a null-pointer-deref).
What you can see in this commit is the result of this work. Why
all this trouble instead of using nftw?
The special thing about recurse() is that you tell the function
when to recurse() in your fn(). You don't need special flags to
tell nftw() to skip the subtree, just to give an example.
The only single downside to this is that now, you are not allowed
to unconditionally call recurse() from your fn(). It has to be
a directory.
However, that is a cost I think is easily weighed up by the
advantages.
Another thing is the history: I added a procedure at the end of
the outmost recurse to free the history. This way we don't leak
memory.
A simple optimization on the side:
- if (h->dev == st.st_dev && h->ino == st.st_ino)
+ if (h->ino == st.st_ino && h->dev == st.st_dev)
First compare the likely difference in inode-numbers instead of
checking the unlikely condition that the device-numbers are
different.
2015-03-18 19:53:42 -04:00
|
|
|
if (!r->depth) {
|
Audit tar(1), add DIRFIRST-flag to recurse()
I've been wanting to do this for a while now, as tar(1) used to
be one of messiest and cruftiest tools.
First off, before walking through the audit, I'll talk about
what the DIRFIRST-flag for recurse() does.
It basically calls fn() on the first-level-dir before calling
it's subentries. It's necessary here, because else the order
of the tar-files would've been wrong (it would try to create
dir/file before creating dir/).
Now, to the audit:
1) Update manpage, fix mistake that compression is also available
for compressing. It's only available for extracting.
2) Define the major, minor and makedev macros from glibc by ourselves.
No need to rely on them, as they are common sense.
decomp()
3) Simple refactorization.
putoctal()
4) Add a truncation check for snprintf().
archive()
5) BUGFIX: Add checks to any checkable function, don't blindly call
them, this is harmful and there are 100 ways to exploit that.
6) Use estrlcpy() instead of snprintf() wherever possible, fix
alignment.
7) BUGFIX: Terminate the result-buffer of readlink(), check if
it even succeeded.
8) Fix sizeof()-formatting.
unarchive()
9) BUGFIX: Add checks to any checkable function, don't blindly call
them, this is harmful and there are 100 ways to exploit that.
10) BUGFIX: strtoul can happily return negative numbers. Add checks
for that and also if the full string has been processed.
11) Remove calls to perror(). We have eprintf, use it.
12) BUGFIX: "minor = strtoul(h->mode, 0, 8);". We need h->minor of
course.
13) Fix typo "usupported", remove fprintf-call.
print()
14) Check fread().
xt()
15) Get rid of snprintf-magic. Use estrlcat().
16) BUGFIX: check for ferror() on the tarfile.
usage()
17) Update it. The old usage() was like 1000 years old.
main()
18) Add DIRFIRST-flag to the recursor.
19) Don't print usage() when a mode is re-set. We allow this in
general.
20) Add function checks and fix error messages.
21) Add tarfilename-global for proper error-messages.
2015-03-20 20:03:35 -04:00
|
|
|
if (!(r->flags & DIRFIRST))
|
|
|
|
(r->fn)(path, &st, data, r);
|
Refactor recurse() again
Okay, why yet another recurse()-refactor?
The last one added the recursor-struct, which simplified things
on the user-end, but there was still one thing that bugged me a lot:
Previously, all fn()'s were forced to (l)stat the paths themselves.
This does not work well when you try to keep up with H-, L- and P-
flags at the same time, as each utility-function would have to set
the right function-pointer for (l)stat every single time.
This is not desirable. Furthermore, recurse should be easy to use
and not involve trouble finding the right (l)stat-function to do it
right.
So, what we needed was a stat-argument for each fn(), so it is
directly accessible. This was impossible to do though when the
fn()'s are still directly called by the programs to "start" the
recurse.
Thus, the fundamental change is to make recurse() the function to
go, while designing the fn()'s in a way they can "live" with st
being NULL (we don't want a null-pointer-deref).
What you can see in this commit is the result of this work. Why
all this trouble instead of using nftw?
The special thing about recurse() is that you tell the function
when to recurse() in your fn(). You don't need special flags to
tell nftw() to skip the subtree, just to give an example.
The only single downside to this is that now, you are not allowed
to unconditionally call recurse() from your fn(). It has to be
a directory.
However, that is a cost I think is easily weighed up by the
advantages.
Another thing is the history: I added a procedure at the end of
the outmost recurse to free the history. This way we don't leak
memory.
A simple optimization on the side:
- if (h->dev == st.st_dev && h->ino == st.st_ino)
+ if (h->ino == st.st_ino && h->dev == st.st_dev)
First compare the likely difference in inode-numbers instead of
checking the unlikely condition that the device-numbers are
different.
2015-03-18 19:53:42 -04:00
|
|
|
|
|
|
|
for (; r->hist; ) {
|
|
|
|
h = r->hist;
|
|
|
|
r->hist = r->hist->prev;
|
|
|
|
free(h);
|
|
|
|
}
|
|
|
|
}
|
2011-05-24 19:24:33 -04:00
|
|
|
}
|