2013-10-16 11:58:52 -04:00
|
|
|
/* See LICENSE file for copyright and license details. */
|
2015-02-14 15:02:41 -05:00
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
|
2015-03-11 18:21:52 -04:00
|
|
|
#include <errno.h>
|
|
|
|
#include <limits.h>
|
2015-04-29 17:19:36 -04:00
|
|
|
#include <stdint.h>
|
2013-10-16 11:58:52 -04:00
|
|
|
#include <stdlib.h>
|
2015-03-11 18:21:52 -04:00
|
|
|
#include <stdio.h>
|
2015-04-29 07:12:18 -04:00
|
|
|
#include <unistd.h>
|
2014-11-13 12:29:30 -05:00
|
|
|
|
2015-03-12 19:25:32 -04:00
|
|
|
#include "fs.h"
|
2013-10-16 11:58:52 -04:00
|
|
|
#include "util.h"
|
|
|
|
|
2015-03-11 18:21:52 -04:00
|
|
|
static size_t maxdepth = SIZE_MAX;
|
|
|
|
static size_t blksize = 512;
|
2013-10-16 11:58:52 -04:00
|
|
|
|
2014-11-13 15:24:47 -05:00
|
|
|
static int aflag = 0;
|
|
|
|
static int sflag = 0;
|
|
|
|
static int hflag = 0;
|
2014-01-23 16:16:05 -05:00
|
|
|
|
2013-10-16 11:58:52 -04:00
|
|
|
static void
|
2015-04-29 07:12:18 -04:00
|
|
|
printpath(off_t n, const char *path)
|
2013-10-16 11:58:52 -04:00
|
|
|
{
|
2014-10-16 05:06:27 -04:00
|
|
|
if (hflag)
|
2014-10-18 17:25:00 -04:00
|
|
|
printf("%s\t%s\n", humansize(n * blksize), path);
|
2014-10-16 05:06:27 -04:00
|
|
|
else
|
2015-04-28 05:25:13 -04:00
|
|
|
printf("%ju\t%s\n", n, path);
|
2013-10-16 11:58:52 -04:00
|
|
|
}
|
|
|
|
|
2015-04-29 07:12:18 -04:00
|
|
|
static off_t
|
2015-03-11 18:21:52 -04:00
|
|
|
nblks(blkcnt_t blocks)
|
2013-10-18 11:40:31 -04:00
|
|
|
{
|
2015-03-11 18:21:52 -04:00
|
|
|
return (512 * blocks + blksize - 1) / blksize;
|
2013-10-18 11:40:31 -04:00
|
|
|
}
|
|
|
|
|
2015-04-24 10:42:48 -04:00
|
|
|
static void
|
Refactor recurse() again
Okay, why yet another recurse()-refactor?
The last one added the recursor-struct, which simplified things
on the user-end, but there was still one thing that bugged me a lot:
Previously, all fn()'s were forced to (l)stat the paths themselves.
This does not work well when you try to keep up with H-, L- and P-
flags at the same time, as each utility-function would have to set
the right function-pointer for (l)stat every single time.
This is not desirable. Furthermore, recurse should be easy to use
and not involve trouble finding the right (l)stat-function to do it
right.
So, what we needed was a stat-argument for each fn(), so it is
directly accessible. This was impossible to do though when the
fn()'s are still directly called by the programs to "start" the
recurse.
Thus, the fundamental change is to make recurse() the function to
go, while designing the fn()'s in a way they can "live" with st
being NULL (we don't want a null-pointer-deref).
What you can see in this commit is the result of this work. Why
all this trouble instead of using nftw?
The special thing about recurse() is that you tell the function
when to recurse() in your fn(). You don't need special flags to
tell nftw() to skip the subtree, just to give an example.
The only single downside to this is that now, you are not allowed
to unconditionally call recurse() from your fn(). It has to be
a directory.
However, that is a cost I think is easily weighed up by the
advantages.
Another thing is the history: I added a procedure at the end of
the outmost recurse to free the history. This way we don't leak
memory.
A simple optimization on the side:
- if (h->dev == st.st_dev && h->ino == st.st_ino)
+ if (h->ino == st.st_ino && h->dev == st.st_dev)
First compare the likely difference in inode-numbers instead of
checking the unlikely condition that the device-numbers are
different.
2015-03-18 19:53:42 -04:00
|
|
|
du(const char *path, struct stat *st, void *total, struct recursor *r)
|
2013-10-16 11:58:52 -04:00
|
|
|
{
|
2015-04-29 07:12:18 -04:00
|
|
|
off_t subtotal = 0;
|
2015-03-11 18:21:52 -04:00
|
|
|
|
Refactor recurse() again
Okay, why yet another recurse()-refactor?
The last one added the recursor-struct, which simplified things
on the user-end, but there was still one thing that bugged me a lot:
Previously, all fn()'s were forced to (l)stat the paths themselves.
This does not work well when you try to keep up with H-, L- and P-
flags at the same time, as each utility-function would have to set
the right function-pointer for (l)stat every single time.
This is not desirable. Furthermore, recurse should be easy to use
and not involve trouble finding the right (l)stat-function to do it
right.
So, what we needed was a stat-argument for each fn(), so it is
directly accessible. This was impossible to do though when the
fn()'s are still directly called by the programs to "start" the
recurse.
Thus, the fundamental change is to make recurse() the function to
go, while designing the fn()'s in a way they can "live" with st
being NULL (we don't want a null-pointer-deref).
What you can see in this commit is the result of this work. Why
all this trouble instead of using nftw?
The special thing about recurse() is that you tell the function
when to recurse() in your fn(). You don't need special flags to
tell nftw() to skip the subtree, just to give an example.
The only single downside to this is that now, you are not allowed
to unconditionally call recurse() from your fn(). It has to be
a directory.
However, that is a cost I think is easily weighed up by the
advantages.
Another thing is the history: I added a procedure at the end of
the outmost recurse to free the history. This way we don't leak
memory.
A simple optimization on the side:
- if (h->dev == st.st_dev && h->ino == st.st_ino)
+ if (h->ino == st.st_ino && h->dev == st.st_dev)
First compare the likely difference in inode-numbers instead of
checking the unlikely condition that the device-numbers are
different.
2015-03-18 19:53:42 -04:00
|
|
|
if (st && S_ISDIR(st->st_mode))
|
2015-03-12 19:25:32 -04:00
|
|
|
recurse(path, &subtotal, r);
|
2015-04-29 07:12:18 -04:00
|
|
|
*((off_t *)total) += subtotal + nblks(st ? st->st_blocks : 0);
|
2013-10-16 11:58:52 -04:00
|
|
|
|
Refactor recurse() again
Okay, why yet another recurse()-refactor?
The last one added the recursor-struct, which simplified things
on the user-end, but there was still one thing that bugged me a lot:
Previously, all fn()'s were forced to (l)stat the paths themselves.
This does not work well when you try to keep up with H-, L- and P-
flags at the same time, as each utility-function would have to set
the right function-pointer for (l)stat every single time.
This is not desirable. Furthermore, recurse should be easy to use
and not involve trouble finding the right (l)stat-function to do it
right.
So, what we needed was a stat-argument for each fn(), so it is
directly accessible. This was impossible to do though when the
fn()'s are still directly called by the programs to "start" the
recurse.
Thus, the fundamental change is to make recurse() the function to
go, while designing the fn()'s in a way they can "live" with st
being NULL (we don't want a null-pointer-deref).
What you can see in this commit is the result of this work. Why
all this trouble instead of using nftw?
The special thing about recurse() is that you tell the function
when to recurse() in your fn(). You don't need special flags to
tell nftw() to skip the subtree, just to give an example.
The only single downside to this is that now, you are not allowed
to unconditionally call recurse() from your fn(). It has to be
a directory.
However, that is a cost I think is easily weighed up by the
advantages.
Another thing is the history: I added a procedure at the end of
the outmost recurse to free the history. This way we don't leak
memory.
A simple optimization on the side:
- if (h->dev == st.st_dev && h->ino == st.st_ino)
+ if (h->ino == st.st_ino && h->dev == st.st_dev)
First compare the likely difference in inode-numbers instead of
checking the unlikely condition that the device-numbers are
different.
2015-03-18 19:53:42 -04:00
|
|
|
if (!sflag && r->depth <= maxdepth && r->depth && st && (S_ISDIR(st->st_mode) || aflag))
|
|
|
|
printpath(subtotal + nblks(st->st_blocks), path);
|
2013-10-16 11:58:52 -04:00
|
|
|
}
|
2015-01-31 19:24:03 -05:00
|
|
|
|
|
|
|
static void
|
|
|
|
usage(void)
|
|
|
|
{
|
2015-02-18 12:24:21 -05:00
|
|
|
eprintf("usage: %s [-a | -s] [-d depth] [-h] [-k] [-H | -L | -P] [-x] [file ...]\n", argv0);
|
2015-01-31 19:24:03 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
main(int argc, char *argv[])
|
|
|
|
{
|
2015-04-18 16:04:49 -04:00
|
|
|
struct recursor r = { .fn = du, .hist = NULL, .depth = 0, .maxdepth = 0,
|
|
|
|
.follow = 'P', .flags = 0 };
|
2015-04-29 07:12:18 -04:00
|
|
|
off_t n = 0;
|
2015-03-12 19:25:32 -04:00
|
|
|
int kflag = 0, dflag = 0;
|
2015-01-31 19:24:03 -05:00
|
|
|
char *bsize;
|
|
|
|
|
|
|
|
ARGBEGIN {
|
|
|
|
case 'a':
|
|
|
|
aflag = 1;
|
|
|
|
break;
|
|
|
|
case 'd':
|
|
|
|
dflag = 1;
|
2015-03-11 18:21:52 -04:00
|
|
|
maxdepth = estrtonum(EARGF(usage()), 0, MIN(LLONG_MAX, SIZE_MAX));
|
2015-01-31 19:24:03 -05:00
|
|
|
break;
|
2015-03-11 18:21:52 -04:00
|
|
|
case 'h':
|
|
|
|
hflag = 1;
|
2015-01-31 19:24:03 -05:00
|
|
|
break;
|
|
|
|
case 'k':
|
|
|
|
kflag = 1;
|
|
|
|
break;
|
2015-03-11 18:21:52 -04:00
|
|
|
case 's':
|
|
|
|
sflag = 1;
|
|
|
|
break;
|
|
|
|
case 'x':
|
2015-03-12 19:25:32 -04:00
|
|
|
r.flags |= SAMEDEV;
|
2015-01-31 19:24:03 -05:00
|
|
|
break;
|
2015-02-09 17:18:49 -05:00
|
|
|
case 'H':
|
|
|
|
case 'L':
|
2015-02-17 11:07:10 -05:00
|
|
|
case 'P':
|
2015-03-12 19:25:32 -04:00
|
|
|
r.follow = ARGC();
|
2015-02-18 12:24:21 -05:00
|
|
|
break;
|
2015-01-31 19:24:03 -05:00
|
|
|
default:
|
|
|
|
usage();
|
|
|
|
} ARGEND;
|
|
|
|
|
|
|
|
if ((aflag && sflag) || (dflag && sflag))
|
|
|
|
usage();
|
|
|
|
|
|
|
|
bsize = getenv("BLOCKSIZE");
|
|
|
|
if (bsize)
|
2015-03-30 13:58:06 -04:00
|
|
|
blksize = estrtonum(bsize, 1, MIN(LLONG_MAX, SIZE_MAX));
|
2015-01-31 19:24:03 -05:00
|
|
|
if (kflag)
|
|
|
|
blksize = 1024;
|
|
|
|
|
2015-03-11 18:21:52 -04:00
|
|
|
if (!argc) {
|
Refactor recurse() again
Okay, why yet another recurse()-refactor?
The last one added the recursor-struct, which simplified things
on the user-end, but there was still one thing that bugged me a lot:
Previously, all fn()'s were forced to (l)stat the paths themselves.
This does not work well when you try to keep up with H-, L- and P-
flags at the same time, as each utility-function would have to set
the right function-pointer for (l)stat every single time.
This is not desirable. Furthermore, recurse should be easy to use
and not involve trouble finding the right (l)stat-function to do it
right.
So, what we needed was a stat-argument for each fn(), so it is
directly accessible. This was impossible to do though when the
fn()'s are still directly called by the programs to "start" the
recurse.
Thus, the fundamental change is to make recurse() the function to
go, while designing the fn()'s in a way they can "live" with st
being NULL (we don't want a null-pointer-deref).
What you can see in this commit is the result of this work. Why
all this trouble instead of using nftw?
The special thing about recurse() is that you tell the function
when to recurse() in your fn(). You don't need special flags to
tell nftw() to skip the subtree, just to give an example.
The only single downside to this is that now, you are not allowed
to unconditionally call recurse() from your fn(). It has to be
a directory.
However, that is a cost I think is easily weighed up by the
advantages.
Another thing is the history: I added a procedure at the end of
the outmost recurse to free the history. This way we don't leak
memory.
A simple optimization on the side:
- if (h->dev == st.st_dev && h->ino == st.st_ino)
+ if (h->ino == st.st_ino && h->dev == st.st_dev)
First compare the likely difference in inode-numbers instead of
checking the unlikely condition that the device-numbers are
different.
2015-03-18 19:53:42 -04:00
|
|
|
recurse(".", &n, &r);
|
2015-04-05 17:28:49 -04:00
|
|
|
printpath(n, ".");
|
2015-01-31 19:24:03 -05:00
|
|
|
} else {
|
2015-03-11 18:21:52 -04:00
|
|
|
for (; *argv; argc--, argv++) {
|
2015-04-05 17:49:00 -04:00
|
|
|
n = 0;
|
Refactor recurse() again
Okay, why yet another recurse()-refactor?
The last one added the recursor-struct, which simplified things
on the user-end, but there was still one thing that bugged me a lot:
Previously, all fn()'s were forced to (l)stat the paths themselves.
This does not work well when you try to keep up with H-, L- and P-
flags at the same time, as each utility-function would have to set
the right function-pointer for (l)stat every single time.
This is not desirable. Furthermore, recurse should be easy to use
and not involve trouble finding the right (l)stat-function to do it
right.
So, what we needed was a stat-argument for each fn(), so it is
directly accessible. This was impossible to do though when the
fn()'s are still directly called by the programs to "start" the
recurse.
Thus, the fundamental change is to make recurse() the function to
go, while designing the fn()'s in a way they can "live" with st
being NULL (we don't want a null-pointer-deref).
What you can see in this commit is the result of this work. Why
all this trouble instead of using nftw?
The special thing about recurse() is that you tell the function
when to recurse() in your fn(). You don't need special flags to
tell nftw() to skip the subtree, just to give an example.
The only single downside to this is that now, you are not allowed
to unconditionally call recurse() from your fn(). It has to be
a directory.
However, that is a cost I think is easily weighed up by the
advantages.
Another thing is the history: I added a procedure at the end of
the outmost recurse to free the history. This way we don't leak
memory.
A simple optimization on the side:
- if (h->dev == st.st_dev && h->ino == st.st_ino)
+ if (h->ino == st.st_ino && h->dev == st.st_dev)
First compare the likely difference in inode-numbers instead of
checking the unlikely condition that the device-numbers are
different.
2015-03-18 19:53:42 -04:00
|
|
|
recurse(*argv, &n, &r);
|
|
|
|
printpath(n, *argv);
|
2015-01-31 19:24:03 -05:00
|
|
|
}
|
|
|
|
}
|
2015-03-11 18:21:52 -04:00
|
|
|
|
Add *fshut() functions to properly flush file streams
This has been a known issue for a long time. Example:
printf "word" > /dev/full
wouldn't report there's not enough space on the device.
This is due to the fact that every libc has internal buffers
for stdout which store fragments of written data until they reach
a certain size or on some callback to flush them all at once to the
kernel.
You can force the libc to flush them with fflush(). In case flushing
fails, you can check the return value of fflush() and report an error.
However, previously, sbase didn't have such checks and without fflush(),
the libc silently flushes the buffers on exit without checking the errors.
No offense, but there's no way for the libc to report errors in the exit-
condition.
GNU coreutils solve this by having onexit-callbacks to handle the flushing
and report issues, but they have obvious deficiencies.
After long discussions on IRC, we came to the conclusion that checking the
return value of every io-function would be a bit too much, and having a
general-purpose fclose-wrapper would be the best way to go.
It turned out that fclose() alone is not enough to detect errors. The right
way to do it is to fflush() + check ferror on the fp and then to a fclose().
This is what fshut does and that's how it's done before each return.
The return value is obviously affected, reporting an error in case a flush
or close failed, but also when reading failed for some reason, the error-
state is caught.
the !!( ... + ...) construction is used to call all functions inside the
brackets and not "terminating" on the first.
We want errors to be reported, but there's no reason to stop flushing buffers
when one other file buffer has issues.
Obviously, functionales come before the flush and ret-logic comes after to
prevent early exits as well without reporting warnings if there are any.
One more advantage of fshut() is that it is even able to report errors
on obscure NFS-setups which the other coreutils are unable to detect,
because they only check the return-value of fflush() and fclose(),
not ferror() as well.
2015-04-04 15:25:17 -04:00
|
|
|
return fshut(stdout, "<stdout>") || recurse_status;
|
2015-01-31 19:24:03 -05:00
|
|
|
}
|