tail: Process bytes with -c option, and add -m option for runes

POSIX says that -c specifies a number of bytes, not characters. This
flag is commonly used by scripts that operate on binary files to things
like extract a header. Treating the offsets as character offsets will
break things in mysterious ways.

Instead, add a -m option (chosen to match `wc -m`, which also operates
on characters) to handle character offsets.
This commit is contained in:
Michael Forney 2017-07-03 14:58:52 -07:00 committed by Anselm R Garbe
parent 1ab4ac60ff
commit ea8622a4ce
2 changed files with 27 additions and 8 deletions

6
tail.1
View File

@ -7,7 +7,7 @@
.Sh SYNOPSIS .Sh SYNOPSIS
.Nm .Nm
.Op Fl f .Op Fl f
.Op Fl c Ar num | Fl n Ar num | Fl Ns Ar num .Op Fl c Ar num | Fl m Ar num | Fl n Ar num | Fl Ns Ar num
.Op Ar file ... .Op Ar file ...
.Sh DESCRIPTION .Sh DESCRIPTION
.Nm .Nm
@ -20,10 +20,10 @@ is given,
reads from stdin. reads from stdin.
.Sh OPTIONS .Sh OPTIONS
.Bl -tag -width Ds .Bl -tag -width Ds
.It Fl c Ar num | Fl n Ar num | Fl Ns Ar num .It Fl c Ar num | Fl m Ar num | Fl n Ar num | Fl Ns Ar num
Display final Display final
.Ar num .Ar num
characters | lines | bytes | characters | lines |
lines. If lines. If
.Ar num .Ar num
begins with '+' begins with '+'

29
tail.c
View File

@ -26,12 +26,23 @@ dropinit(int fd, const char *fname, size_t count)
goto copy; goto copy;
count--; /* numbering starts at 1 */ count--; /* numbering starts at 1 */
while (count && (n = read(fd, buf, sizeof(buf))) > 0) { while (count && (n = read(fd, buf, sizeof(buf))) > 0) {
if (mode == 'n') { switch (mode) {
case 'n': /* lines */
for (p = buf; count && n > 0; p++, n--) { for (p = buf; count && n > 0; p++, n--) {
if (*p == '\n') if (*p == '\n')
count--; count--;
} }
} else { break;
case 'c': /* bytes */
if (count > n) {
count -= n;
} else {
p = buf + count;
n -= count;
count = 0;
}
break;
case 'm': /* runes */
for (p = buf; count && n > 0; p += nr, n -= nr, count--) { for (p = buf; count && n > 0; p += nr, n -= nr, count--) {
nr = charntorune(&r, p, n); nr = charntorune(&r, p, n);
if (!nr) { if (!nr) {
@ -42,6 +53,7 @@ dropinit(int fd, const char *fname, size_t count)
break; break;
} }
} }
break;
} }
} }
if (count) { if (count) {
@ -90,7 +102,8 @@ taketail(int fd, const char *fname, size_t count)
if (n == 0) if (n == 0)
break; break;
len += n; len += n;
if (mode == 'n') { switch (mode) {
case 'n': /* lines */
/* ignore the last character; if it is a newline, it /* ignore the last character; if it is a newline, it
* ends the last line */ * ends the last line */
for (p = buf + len - 2, left = count; p >= buf; p--) { for (p = buf + len - 2, left = count; p >= buf; p--) {
@ -102,7 +115,11 @@ taketail(int fd, const char *fname, size_t count)
break; break;
} }
} }
} else { break;
case 'c': /* bytes */
p = count < len ? buf + len - count : buf;
break;
case 'm': /* runes */
for (p = buf + len - 1, left = count; p >= buf; p--) { for (p = buf + len - 1, left = count; p >= buf; p--) {
/* skip utf-8 continuation bytes */ /* skip utf-8 continuation bytes */
if ((*p & 0xc0) == 0x80) if ((*p & 0xc0) == 0x80)
@ -111,6 +128,7 @@ taketail(int fd, const char *fname, size_t count)
if (!left) if (!left)
break; break;
} }
break;
} }
if (p > buf) { if (p > buf) {
len -= p - buf; len -= p - buf;
@ -125,7 +143,7 @@ taketail(int fd, const char *fname, size_t count)
static void static void
usage(void) usage(void)
{ {
eprintf("usage: %s [-f] [-c num | -n num | -num] [file ...]\n", argv0); eprintf("usage: %s [-f] [-c num | -m num | -n num | -num] [file ...]\n", argv0);
} }
int int
@ -143,6 +161,7 @@ main(int argc, char *argv[])
fflag = 1; fflag = 1;
break; break;
case 'c': case 'c':
case 'm':
case 'n': case 'n':
mode = ARGC(); mode = ARGC();
numstr = EARGF(usage()); numstr = EARGF(usage());