sbase/od.c

164 lines
3.3 KiB
C
Raw Normal View History

/* See LICENSE file for copyright and license details. */
#include <ctype.h>
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
#include "util.h"
static size_t bytes_per_line = 16;
static off_t maxbytes = -1;
static off_t skip = 0;
static unsigned char radix = 'o';
static unsigned char type = 'o';
static void
printaddress(FILE *f, off_t addr)
{
char fmt[] = "%07j# ";
if (radix == 'n') {
fputc(' ', f);
} else {
fmt[4] = radix;
fprintf(f, fmt, (intmax_t)addr);
}
}
static void
printchar(FILE *f, unsigned char c)
{
const char *namedict[] = {
"nul", "soh", "stx", "etx", "eot", "enq", "ack",
"bel", "bs", "ht", "nl", "vt", "ff", "cr",
"so", "si", "dle", "dc1", "dc2", "dc3", "dc4",
"nak", "syn", "etb", "can", "em", "sub", "esc",
"fs", "gs", "rs", "us", "sp",
};
const char *escdict[] = {
['\0'] = "\\0", ['\a'] = "\\a",
['\b'] = "\\b", ['\t'] = "\\t",
['\n'] = "\\n", ['\v'] = "\\v",
['\f'] = "\\f", ['\r'] = "\\r",
};
const char *fmtdict[] = {
['d'] = "%4hhd ", ['o'] = "%03hho ",
['u'] = "%3hhu ", ['x'] = "%02hhx ",
};
switch (type) {
case 'a':
c &= ~128; /* clear high bit as required by standard */
if (c < LEN(namedict) || c == 127) {
fprintf(f, "%3s ", (c == 127) ? "del" : namedict[c]);
} else {
fprintf(f, "%3c ", c);
}
break;
case 'c':
if (strchr("\a\b\t\n\v\f\r\0", c)) {
fprintf(f, "%3s ", escdict[c]);
} else {
fprintf(f, "%3c ", c);
}
break;
default:
fprintf(f, fmtdict[type], c);
}
}
static void
od(FILE *in, char *in_name, FILE *out, char *out_name)
{
off_t addr;
size_t i, chunklen;
unsigned char buf[BUFSIZ];
for (addr = 0; (chunklen = fread(buf, 1, BUFSIZ, in)); ) {
for (i = 0; i < chunklen && (maxbytes == -1 ||
(addr - skip) < maxbytes); ++i, ++addr) {
if (addr - skip < 0)
continue;
if (((addr - skip) % bytes_per_line) == 0) {
if (addr - skip)
fputc('\n', out);
printaddress(out, addr);
}
printchar(out, buf[i]);
}
if (feof(in) || ferror(in) || ferror(out))
break;
}
if (addr - skip > 0)
fputc('\n', out);
if (radix != 'n') {
printaddress(out, MAX(addr, skip));
fputc('\n', out);
}
}
static void
usage(void)
{
Implement od(1) v-flag If this flag is not given, od(1) automatically replaces duplicate adjacent lines with an '*' for each reoccurence. If this flag is set, thus, no such filtering occurs. In this case this would mean having to somehow keep the last printed line in some backbuffer, building the next line and then doing the necessary comparisons. This basically means that we duplicate the functionality provided with uniq(1). So instead of $ od -t a > dump you'd rather do $ od -t a | uniq -f 1 -c > dump Skipping the first field is necessary, as the addresses obviously differ. Now, I was thinking hard why this flag even exists. If POSIX mandated to add the address before the asterisk, so we know the offset of duplicate occurrences, this would make sense. However, this is not the case. Using uniq(1) also gives nicer output: ~ $ echo "111111111111111111111111111111111111111111111111" | od -t a -v | uniq -f 1 -c 3 0000000 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0000060 nl 1 0000061 in comparison to $ echo "111111111111111111111111111111111111111111111111" | od -t a 0000000 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 0000060 nl 0000061 Before working on od(1), I didn't even know it would filter out duplicate adjacent lines like that. This is also a matter of predictability. Concluding, the v-flag is implicitly set and users urged to just use the existing tools provided by the system. I don't think we would break scripts either. Firstly, it's rather unlikely to have duplicate lines exactly matching the line-length of od(1). Secondly, even if a script did that specifically, in the worst case there would be a counting error or something. Given od(1) is mostly used interactively, we can safely assume this feature is for the benefit of the users. Ditch this legacy POSIX crap! Please enter the commit message for your changes. Lines starting
2015-09-30 06:54:24 -04:00
eprintf("usage: %s [-A d|o|x|n] [-t a|c|d|o|u|x] [-v] [file ...]\n", argv0);
}
int
main(int argc, char *argv[])
{
FILE *fp;
int ret = 0;
char *s;
ARGBEGIN {
case 'A':
s = EARGF(usage());
if (strlen(s) != 1 || !strchr("doxn", s[0]))
usage();
radix = s[0];
break;
case 'j':
if ((skip = parseoffset(EARGF(usage()))) < 0)
return 1;
break;
case 'N':
if ((maxbytes = parseoffset(EARGF(usage()))) < 0)
return 1;
break;
case 't':
s = EARGF(usage());
if (strlen(s) != 1 || !strchr("acdoux", s[0]))
usage();
type = s[0];
break;
Implement od(1) v-flag If this flag is not given, od(1) automatically replaces duplicate adjacent lines with an '*' for each reoccurence. If this flag is set, thus, no such filtering occurs. In this case this would mean having to somehow keep the last printed line in some backbuffer, building the next line and then doing the necessary comparisons. This basically means that we duplicate the functionality provided with uniq(1). So instead of $ od -t a > dump you'd rather do $ od -t a | uniq -f 1 -c > dump Skipping the first field is necessary, as the addresses obviously differ. Now, I was thinking hard why this flag even exists. If POSIX mandated to add the address before the asterisk, so we know the offset of duplicate occurrences, this would make sense. However, this is not the case. Using uniq(1) also gives nicer output: ~ $ echo "111111111111111111111111111111111111111111111111" | od -t a -v | uniq -f 1 -c 3 0000000 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0000060 nl 1 0000061 in comparison to $ echo "111111111111111111111111111111111111111111111111" | od -t a 0000000 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 0000060 nl 0000061 Before working on od(1), I didn't even know it would filter out duplicate adjacent lines like that. This is also a matter of predictability. Concluding, the v-flag is implicitly set and users urged to just use the existing tools provided by the system. I don't think we would break scripts either. Firstly, it's rather unlikely to have duplicate lines exactly matching the line-length of od(1). Secondly, even if a script did that specifically, in the worst case there would be a counting error or something. Given od(1) is mostly used interactively, we can safely assume this feature is for the benefit of the users. Ditch this legacy POSIX crap! Please enter the commit message for your changes. Lines starting
2015-09-30 06:54:24 -04:00
case 'v':
/* Always set. Use "uniq -f 1 -c" to handle duplicate lines. */
break;
default:
usage();
} ARGEND;
if (!argc) {
od(stdin, "<stdin>", stdout, "<stdout>");
} else {
for (; *argv; argc--, argv++) {
if (!strcmp(*argv, "-")) {
*argv = "<stdin>";
fp = stdin;
} else if (!(fp = fopen(*argv, "r"))) {
weprintf("fopen %s:", *argv);
ret = 1;
continue;
}
od(fp, *argv, stdout, "<stdout>");
if (fp != stdin && fshut(fp, *argv))
ret = 1;
}
}
ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>") |
fshut(stderr, "<stderr>");
return ret;
}