sbase/grep.c

291 lines
5.3 KiB
C
Raw Normal View History

2011-05-22 21:36:34 -04:00
/* See LICENSE file for copyright and license details. */
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
2014-11-16 07:37:43 -05:00
#include "queue.h"
2011-06-18 01:42:24 -04:00
#include "util.h"
enum { Match = 0, NoMatch = 1, Error = 2 };
2011-05-22 21:36:34 -04:00
static void addpattern(const char *, size_t);
static void addpatternfile(FILE *);
static int grep(FILE *, const char *);
2011-05-22 21:36:34 -04:00
static int Eflag;
2014-11-20 09:35:23 -05:00
static int Fflag;
2014-11-20 09:14:26 -05:00
static int Hflag;
static int eflag;
2014-11-20 11:57:49 -05:00
static int fflag;
2014-11-20 09:14:26 -05:00
static int hflag;
2014-11-20 12:38:31 -05:00
static int iflag;
2014-11-20 09:14:26 -05:00
static int sflag;
static int vflag;
static int wflag;
2014-11-20 09:47:26 -05:00
static int xflag;
static int many;
2014-11-20 12:26:47 -05:00
static int mode;
2011-05-22 21:36:34 -04:00
2014-11-16 07:37:43 -05:00
struct pattern {
2013-09-27 11:26:22 -04:00
char *pattern;
regex_t preg;
SLIST_ENTRY(pattern) entry;
2014-11-16 07:37:43 -05:00
};
static SLIST_HEAD(phead, pattern) phead;
2013-09-27 11:26:22 -04:00
static void
addpattern(const char *pattern, size_t patlen)
2013-09-27 11:26:22 -04:00
{
2014-11-16 07:37:43 -05:00
struct pattern *pnode;
2014-11-20 09:47:26 -05:00
char *tmp;
int bol, eol;
size_t len;
2013-09-27 11:26:22 -04:00
if (!patlen)
return;
/* a null BRE/ERE matches every line */
if (!Fflag)
if (pattern[0] == '\0')
2016-01-20 12:26:47 -05:00
pattern = "^";
2014-11-20 09:47:26 -05:00
if (!Fflag && xflag) {
tmp = enmalloc(Error, patlen + 3);
snprintf(tmp, patlen + 3, "%s%s%s",
2014-11-20 09:47:26 -05:00
pattern[0] == '^' ? "" : "^",
pattern,
pattern[patlen - 1] == '$' ? "" : "$");
} else if (!Fflag && wflag) {
len = patlen + 5 + (Eflag ? 2 : 4);
2015-02-10 20:08:17 -05:00
tmp = enmalloc(Error, len);
bol = eol = 0;
if (pattern[0] == '^')
bol = 1;
if (pattern[patlen - 1] == '$')
eol = 1;
snprintf(tmp, len, "%s\\<%s%.*s%s\\>%s",
bol ? "^" : "",
Eflag ? "(" : "\\(",
(int)patlen - bol - eol, pattern + bol,
Eflag ? ")" : "\\)",
eol ? "$" : "");
2014-11-20 09:47:26 -05:00
} else {
2015-02-10 20:08:17 -05:00
tmp = enstrdup(Error, pattern);
}
2015-02-10 20:08:17 -05:00
pnode = enmalloc(Error, sizeof(*pnode));
pnode->pattern = tmp;
SLIST_INSERT_HEAD(&phead, pnode, entry);
2013-09-27 11:26:22 -04:00
}
2014-11-20 11:57:49 -05:00
static void
addpatternfile(FILE *fp)
2014-11-20 11:57:49 -05:00
{
2014-12-16 15:20:41 -05:00
static char *buf = NULL;
static size_t size = 0;
2015-01-31 09:19:42 -05:00
ssize_t len = 0;
2014-11-20 11:57:49 -05:00
while ((len = getline(&buf, &size, fp)) > 0) {
2015-01-31 09:19:42 -05:00
if (len > 0 && buf[len - 1] == '\n')
2014-11-20 11:57:49 -05:00
buf[len - 1] = '\0';
addpattern(buf, (size_t)len);
2014-11-20 11:57:49 -05:00
}
2014-11-21 06:43:53 -05:00
if (ferror(fp))
enprintf(Error, "read error:");
2014-11-20 11:57:49 -05:00
}
static int
grep(FILE *fp, const char *str)
2011-05-22 21:36:34 -04:00
{
2014-12-16 15:20:41 -05:00
static char *buf = NULL;
static size_t size = 0;
2015-01-31 09:19:42 -05:00
ssize_t len = 0;
long c = 0, n;
2014-11-16 07:37:43 -05:00
struct pattern *pnode;
int match, result = NoMatch;
2011-05-22 21:36:34 -04:00
for (n = 1; (len = getline(&buf, &size, fp)) > 0; n++) {
/* Remove the trailing newline if one is present. */
if (len && buf[len - 1] == '\n')
buf[len - 1] = '\0';
match = 0;
SLIST_FOREACH(pnode, &phead, entry) {
if (Fflag) {
if (xflag) {
if (!(iflag ? strcasecmp : strcmp)(buf, pnode->pattern)) {
match = 1;
break;
}
2014-11-20 12:38:31 -05:00
} else {
if ((iflag ? strcasestr : strstr)(buf, pnode->pattern)) {
match = 1;
break;
}
2014-11-20 12:38:31 -05:00
}
} else {
if (regexec(&pnode->preg, buf, 0, NULL, 0) == 0) {
match = 1;
break;
}
2014-11-20 09:35:23 -05:00
}
}
if (match != vflag) {
2019-05-20 12:21:08 -04:00
result = Match;
switch (mode) {
2013-09-27 11:26:22 -04:00
case 'c':
c++;
break;
case 'l':
puts(str);
goto end;
case 'q':
exit(Match);
default:
2014-11-16 14:03:25 -05:00
if (!hflag && (many || Hflag))
2013-09-27 11:26:22 -04:00
printf("%s:", str);
if (mode == 'n')
2013-09-27 11:26:22 -04:00
printf("%ld:", n);
puts(buf);
2013-09-27 11:26:22 -04:00
break;
}
2011-05-22 21:36:34 -04:00
}
}
if (mode == 'c')
2011-05-25 15:40:47 -04:00
printf("%ld\n", c);
end:
if (ferror(fp)) {
weprintf("%s: read error:", str);
result = Error;
}
return result;
2011-05-22 21:36:34 -04:00
}
static void
usage(void)
{
enprintf(Error, "usage: %s [-EFHchilnqsvwx] [-e pattern] [-f file] "
"[pattern] [file ...]\n", argv0);
}
int
main(int argc, char *argv[])
{
struct pattern *pnode;
2015-05-15 07:37:33 -04:00
int m, flags = REG_NOSUB, match = NoMatch;
FILE *fp;
char *arg;
SLIST_INIT(&phead);
ARGBEGIN {
case 'E':
Eflag = 1;
Fflag = 0;
flags |= REG_EXTENDED;
break;
case 'F':
Fflag = 1;
Eflag = 0;
flags &= ~REG_EXTENDED;
break;
case 'H':
Hflag = 1;
hflag = 0;
break;
case 'e':
arg = EARGF(usage());
2015-03-27 17:47:15 -04:00
if (!(fp = fmemopen(arg, strlen(arg) + 1, "r")))
eprintf("fmemopen:");
addpatternfile(fp);
Add *fshut() functions to properly flush file streams This has been a known issue for a long time. Example: printf "word" > /dev/full wouldn't report there's not enough space on the device. This is due to the fact that every libc has internal buffers for stdout which store fragments of written data until they reach a certain size or on some callback to flush them all at once to the kernel. You can force the libc to flush them with fflush(). In case flushing fails, you can check the return value of fflush() and report an error. However, previously, sbase didn't have such checks and without fflush(), the libc silently flushes the buffers on exit without checking the errors. No offense, but there's no way for the libc to report errors in the exit- condition. GNU coreutils solve this by having onexit-callbacks to handle the flushing and report issues, but they have obvious deficiencies. After long discussions on IRC, we came to the conclusion that checking the return value of every io-function would be a bit too much, and having a general-purpose fclose-wrapper would be the best way to go. It turned out that fclose() alone is not enough to detect errors. The right way to do it is to fflush() + check ferror on the fp and then to a fclose(). This is what fshut does and that's how it's done before each return. The return value is obviously affected, reporting an error in case a flush or close failed, but also when reading failed for some reason, the error- state is caught. the !!( ... + ...) construction is used to call all functions inside the brackets and not "terminating" on the first. We want errors to be reported, but there's no reason to stop flushing buffers when one other file buffer has issues. Obviously, functionales come before the flush and ret-logic comes after to prevent early exits as well without reporting warnings if there are any. One more advantage of fshut() is that it is even able to report errors on obscure NFS-setups which the other coreutils are unable to detect, because they only check the return-value of fflush() and fclose(), not ferror() as well.
2015-04-04 15:25:17 -04:00
efshut(fp, arg);
eflag = 1;
break;
case 'f':
arg = EARGF(usage());
fp = fopen(arg, "r");
if (!fp)
enprintf(Error, "fopen %s:", arg);
addpatternfile(fp);
Add *fshut() functions to properly flush file streams This has been a known issue for a long time. Example: printf "word" > /dev/full wouldn't report there's not enough space on the device. This is due to the fact that every libc has internal buffers for stdout which store fragments of written data until they reach a certain size or on some callback to flush them all at once to the kernel. You can force the libc to flush them with fflush(). In case flushing fails, you can check the return value of fflush() and report an error. However, previously, sbase didn't have such checks and without fflush(), the libc silently flushes the buffers on exit without checking the errors. No offense, but there's no way for the libc to report errors in the exit- condition. GNU coreutils solve this by having onexit-callbacks to handle the flushing and report issues, but they have obvious deficiencies. After long discussions on IRC, we came to the conclusion that checking the return value of every io-function would be a bit too much, and having a general-purpose fclose-wrapper would be the best way to go. It turned out that fclose() alone is not enough to detect errors. The right way to do it is to fflush() + check ferror on the fp and then to a fclose(). This is what fshut does and that's how it's done before each return. The return value is obviously affected, reporting an error in case a flush or close failed, but also when reading failed for some reason, the error- state is caught. the !!( ... + ...) construction is used to call all functions inside the brackets and not "terminating" on the first. We want errors to be reported, but there's no reason to stop flushing buffers when one other file buffer has issues. Obviously, functionales come before the flush and ret-logic comes after to prevent early exits as well without reporting warnings if there are any. One more advantage of fshut() is that it is even able to report errors on obscure NFS-setups which the other coreutils are unable to detect, because they only check the return-value of fflush() and fclose(), not ferror() as well.
2015-04-04 15:25:17 -04:00
efshut(fp, arg);
fflag = 1;
break;
case 'h':
hflag = 1;
Hflag = 0;
break;
case 'c':
case 'l':
case 'n':
case 'q':
mode = ARGC();
break;
case 'i':
flags |= REG_ICASE;
iflag = 1;
break;
case 's':
sflag = 1;
break;
case 'v':
vflag = 1;
break;
case 'w':
wflag = 1;
break;
case 'x':
xflag = 1;
break;
default:
usage();
} ARGEND
if (argc == 0 && !eflag && !fflag)
usage(); /* no pattern */
/* just add literal pattern to list */
if (!eflag && !fflag) {
2015-03-27 17:47:15 -04:00
if (!(fp = fmemopen(argv[0], strlen(argv[0]) + 1, "r")))
eprintf("fmemopen:");
addpatternfile(fp);
Add *fshut() functions to properly flush file streams This has been a known issue for a long time. Example: printf "word" > /dev/full wouldn't report there's not enough space on the device. This is due to the fact that every libc has internal buffers for stdout which store fragments of written data until they reach a certain size or on some callback to flush them all at once to the kernel. You can force the libc to flush them with fflush(). In case flushing fails, you can check the return value of fflush() and report an error. However, previously, sbase didn't have such checks and without fflush(), the libc silently flushes the buffers on exit without checking the errors. No offense, but there's no way for the libc to report errors in the exit- condition. GNU coreutils solve this by having onexit-callbacks to handle the flushing and report issues, but they have obvious deficiencies. After long discussions on IRC, we came to the conclusion that checking the return value of every io-function would be a bit too much, and having a general-purpose fclose-wrapper would be the best way to go. It turned out that fclose() alone is not enough to detect errors. The right way to do it is to fflush() + check ferror on the fp and then to a fclose(). This is what fshut does and that's how it's done before each return. The return value is obviously affected, reporting an error in case a flush or close failed, but also when reading failed for some reason, the error- state is caught. the !!( ... + ...) construction is used to call all functions inside the brackets and not "terminating" on the first. We want errors to be reported, but there's no reason to stop flushing buffers when one other file buffer has issues. Obviously, functionales come before the flush and ret-logic comes after to prevent early exits as well without reporting warnings if there are any. One more advantage of fshut() is that it is even able to report errors on obscure NFS-setups which the other coreutils are unable to detect, because they only check the return-value of fflush() and fclose(), not ferror() as well.
2015-04-04 15:25:17 -04:00
efshut(fp, argv[0]);
argc--;
argv++;
}
if (!Fflag)
/* Compile regex for all search patterns */
SLIST_FOREACH(pnode, &phead, entry)
enregcomp(Error, &pnode->preg, pnode->pattern, flags);
many = (argc > 1);
if (argc == 0) {
match = grep(stdin, "<stdin>");
} else {
for (; *argv; argc--, argv++) {
if (!strcmp(*argv, "-")) {
*argv = "<stdin>";
fp = stdin;
} else if (!(fp = fopen(*argv, "r"))) {
if (!sflag)
weprintf("fopen %s:", *argv);
match = Error;
continue;
}
m = grep(fp, *argv);
if (m == Error || (match != Error && m == Match))
match = m;
if (fp != stdin && fshut(fp, *argv))
Add *fshut() functions to properly flush file streams This has been a known issue for a long time. Example: printf "word" > /dev/full wouldn't report there's not enough space on the device. This is due to the fact that every libc has internal buffers for stdout which store fragments of written data until they reach a certain size or on some callback to flush them all at once to the kernel. You can force the libc to flush them with fflush(). In case flushing fails, you can check the return value of fflush() and report an error. However, previously, sbase didn't have such checks and without fflush(), the libc silently flushes the buffers on exit without checking the errors. No offense, but there's no way for the libc to report errors in the exit- condition. GNU coreutils solve this by having onexit-callbacks to handle the flushing and report issues, but they have obvious deficiencies. After long discussions on IRC, we came to the conclusion that checking the return value of every io-function would be a bit too much, and having a general-purpose fclose-wrapper would be the best way to go. It turned out that fclose() alone is not enough to detect errors. The right way to do it is to fflush() + check ferror on the fp and then to a fclose(). This is what fshut does and that's how it's done before each return. The return value is obviously affected, reporting an error in case a flush or close failed, but also when reading failed for some reason, the error- state is caught. the !!( ... + ...) construction is used to call all functions inside the brackets and not "terminating" on the first. We want errors to be reported, but there's no reason to stop flushing buffers when one other file buffer has issues. Obviously, functionales come before the flush and ret-logic comes after to prevent early exits as well without reporting warnings if there are any. One more advantage of fshut() is that it is even able to report errors on obscure NFS-setups which the other coreutils are unable to detect, because they only check the return-value of fflush() and fclose(), not ferror() as well.
2015-04-04 15:25:17 -04:00
match = Error;
}
}
Add *fshut() functions to properly flush file streams This has been a known issue for a long time. Example: printf "word" > /dev/full wouldn't report there's not enough space on the device. This is due to the fact that every libc has internal buffers for stdout which store fragments of written data until they reach a certain size or on some callback to flush them all at once to the kernel. You can force the libc to flush them with fflush(). In case flushing fails, you can check the return value of fflush() and report an error. However, previously, sbase didn't have such checks and without fflush(), the libc silently flushes the buffers on exit without checking the errors. No offense, but there's no way for the libc to report errors in the exit- condition. GNU coreutils solve this by having onexit-callbacks to handle the flushing and report issues, but they have obvious deficiencies. After long discussions on IRC, we came to the conclusion that checking the return value of every io-function would be a bit too much, and having a general-purpose fclose-wrapper would be the best way to go. It turned out that fclose() alone is not enough to detect errors. The right way to do it is to fflush() + check ferror on the fp and then to a fclose(). This is what fshut does and that's how it's done before each return. The return value is obviously affected, reporting an error in case a flush or close failed, but also when reading failed for some reason, the error- state is caught. the !!( ... + ...) construction is used to call all functions inside the brackets and not "terminating" on the first. We want errors to be reported, but there's no reason to stop flushing buffers when one other file buffer has issues. Obviously, functionales come before the flush and ret-logic comes after to prevent early exits as well without reporting warnings if there are any. One more advantage of fshut() is that it is even able to report errors on obscure NFS-setups which the other coreutils are unable to detect, because they only check the return-value of fflush() and fclose(), not ferror() as well.
2015-04-04 15:25:17 -04:00
if (fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>"))
match = Error;
return match;
}