Add UTF-8-support to strings(1), add t-flag and refactor code

Previously, the string-length was limited to BUFSIZ, which is an
obvious deficiency.
Now the buffer only needs to be as long as the user specifies the
minimal string length.
I added UTF-8-support, because that's how POSIX wants it and there
are cases where you need this. It doesn't add ELF-barf compared to
the previous implementation.
The t-flag is also pretty important for POSIX-compliance, so I added
it.
The only trouble previously was the a-flag, but given that POSIX
leaves undefined what the a-flag actually does, we set it as default
and don't care about parsing ELF-headers, which has already
turned out to be a security issue in GNU coreutils[0].

[0]: http://lcamtuf.blogspot.ro/2014/10/psa-dont-run-strings-on-untrusted-files.html
This commit is contained in:
FRIGN 2015-02-17 17:04:36 +01:00
parent 949dafc171
commit e5b5497773
3 changed files with 80 additions and 35 deletions

2
README
View File

@ -67,7 +67,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
sort no -m, -o, -d, -f, -i sort no -m, -o, -d, -f, -i
=* split yes none =* split yes none
=* sponge non-posix none =* sponge non-posix none
strings no -t #* strings yes none
=* sync non-posix none =* sync non-posix none
=* tail yes none =* tail yes none
=* tar non-posix none =* tar non-posix none

View File

@ -1,32 +1,52 @@
.Dd November 23, 2014 .Dd Februrary 17, 2015
.Dt STRINGS 1 .Dt STRINGS 1
.Os sbase .Os sbase
.Sh NAME .Sh NAME
.Nm strings .Nm strings
.Nd print the strings of printable characters in files .Nd print strings of printable characters in files
.Sh SYNOPSIS .Sh SYNOPSIS
.Nm .Nm
.Op Fl a .Op Fl a
.Op Fl n Ar len .Op Fl n Ar num
.Op Fl t Ar format
.Op Ar file ... .Op Ar file ...
.Sh DESCRIPTION .Sh DESCRIPTION
.Nm .Nm
prints the printable character sequences that are at least 4 characters writes sequences of at least 4 printable characters in each
long. If no .Ar file
.Ar files to stdout.
are given, If no
.Ar file
is given,
.Nm .Nm
reads from stdin. reads from stdin.
.Sh OPTIONS .Sh OPTIONS
.Bl -tag -width Ds .Bl -tag -width Ds
.It Fl a .It Fl a
Scan files in their entirety. This is the default. Scan each
.It Fl n Ar len .Ar file
Only print sequences that are at least entirely. This is the default.
.Ar len .It Fl n Ar num
characters. The default is 4 characters. Print sequences of at least
.Ar num
characters. The default is 4.
.It Fl t Ar format
Prepend each string with its byte offset, with
.Ar format
being one of
.Sy d , o , x
for decimal, octal or hexadecimal numbers.
.El .El
.Sh STANDARDS .Sh STANDARDS
The
.Nm .Nm
mirrors the semantics of Plan9 utility is compliant with the
.Xr strings 1 . .St -p1003.1-2008
specification.
.Pp
The
.Op Fl t
output format has been changed from "%F %s" to "%8lF: %s", with
.Sy F
being one of
.Sy d , o , x .

View File

@ -1,50 +1,75 @@
/* See LICENSE file for copyright and license details. */ /* See LICENSE file for copyright and license details. */
#include <ctype.h>
#include <limits.h> #include <limits.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "utf.h"
#include "util.h" #include "util.h"
static void static char *format = "";
strings(FILE *fp, const char *fname, int len)
{
unsigned char buf[BUFSIZ];
int c, i = 0;
off_t offset = 0;
do { static void
offset++; strings(FILE *fp, const char *fname, size_t len)
if (isprint(c = getc(fp))) {
buf[i++] = c; Rune r, *rbuf;
if ((!isprint(c) && i >= len) || i == sizeof(buf) - 1) { size_t i, bread;
buf[i] = '\0'; off_t off;
printf("%8ld: %s\n", (long)offset - i - 1, buf);
rbuf = emalloc(len * sizeof(*rbuf));
for (off = 0, i = 0; (bread = efgetrune(&r, fp, fname)); ) {
off += bread;
if (r == Runeerror)
continue;
else if (!isprintrune(r)) {
if (i > len)
putchar('\n');
i = 0; i = 0;
continue;
} }
} while (c != EOF); if (i < len) {
if (ferror(fp)) rbuf[i++] = r;
eprintf("%s: read error:", fname); continue;
} else if (i > len) {
efputrune(&r, stdout, "<stdout>");
continue;
}
printf(format, (long)off - i);
for (i = 0; i < len; i++) {
efputrune(rbuf + i, stdout, "<stdout>");
}
i++;
}
free(rbuf);
} }
static void static void
usage(void) usage(void)
{ {
eprintf("usage: %s [-a] [-n len] [file ...]\n", argv0); eprintf("usage: %s [-a] [-n num] [-t format] [file ...]\n", argv0);
} }
int int
main(int argc, char *argv[]) main(int argc, char *argv[])
{ {
FILE *fp; FILE *fp;
size_t len = 4;
int ret = 0; int ret = 0;
int len = 4; char f;
ARGBEGIN { ARGBEGIN {
case 'a': case 'a':
break; break;
case 'n': case 'n':
len = estrtonum(EARGF(usage()), 1, INT_MAX); len = estrtonum(EARGF(usage()), 1, LLONG_MAX);
break;
case 't':
format = estrdup("%8l#: ");
f = *EARGF(usage());
if (f == 'd' || f == 'o' || f == 'x')
format[3] = f;
else
usage();
break; break;
default: default:
usage(); usage();