sbase/od.c

/* See LICENSE file for copyright and license details. */
#include <ctype.h>
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>

#include "util.h"

static size_t bytes_per_line = 16;
static off_t maxbytes = -1;
static off_t skip = 0;
static unsigned char radix = 'o';
static unsigned char type = 'o';

static void
printaddress(FILE *f, off_t addr)
{
	char fmt[] = "%07j# ";

	if (radix == 'n') {
		fputc(' ', f);
	} else {
		fmt[4] = radix;
		fprintf(f, fmt, (intmax_t)addr);
	}
}

static void
printchar(FILE *f, unsigned char c)
{
	const char *namedict[] = {
		"nul", "soh", "stx", "etx", "eot", "enq", "ack",
		"bel", "bs",  "ht",  "nl",  "vt",  "ff",  "cr",
		"so",  "si",  "dle", "dc1", "dc2", "dc3", "dc4",
		"nak", "syn", "etb", "can", "em",  "sub", "esc",
		"fs",  "gs",  "rs",  "us",  "sp",
	};
	const char *escdict[] = {
		['\0'] = "\\0", ['\a'] = "\\a",
		['\b'] = "\\b", ['\t'] = "\\t",
		['\n'] = "\\n", ['\v'] = "\\v",
		['\f'] = "\\f", ['\r'] = "\\r",
	};
	const char *fmtdict[] = {
		['d'] = "%4hhd ", ['o'] = "%03hho ",
		['u'] = "%3hhu ", ['x'] = "%02hhx ",
	};

	switch (type) {
	case 'a':
		c &= ~128; /* clear high bit as required by standard */
		if (c < LEN(namedict) || c == 127) {
			fprintf(f, "%3s ", (c == 127) ? "del" : namedict[c]);
		} else {
			fprintf(f, "%3c ", c);
		}
		break;
	case 'c':
		if (strchr("\a\b\t\n\v\f\r\0", c)) {
			fprintf(f, "%3s ", escdict[c]);
		} else {
			fprintf(f, "%3c ", c);
		}
		break;
	default:
		fprintf(f, fmtdict[type], c);
	}
}

static void
od(FILE *in, char *in_name, FILE *out, char *out_name)
{
	off_t addr;
	size_t i, chunklen;
	unsigned char buf[BUFSIZ];

	for (addr = 0; (chunklen = fread(buf, 1, BUFSIZ, in)); ) {
		for (i = 0; i < chunklen && (maxbytes == -1 ||
		     (addr - skip) < maxbytes); ++i, ++addr) {
			if (addr - skip < 0)
				continue;
			if (((addr - skip) % bytes_per_line) == 0) {
				if (addr - skip)
					fputc('\n', out);
				printaddress(out, addr);
			}
			printchar(out, buf[i]);
		}
		if (feof(in) || ferror(in) || ferror(out))
			break;
	}
	if (addr - skip > 0)
		fputc('\n', out);
	if (radix != 'n') {
		printaddress(out, MAX(addr, skip));
		fputc('\n', out);
	}
}

static void
usage(void)
{
	eprintf("usage: %s [-A d|o|x|n] [-t a|c|d|o|u|x] [-v] [file ...]\n", argv0);
}

int
main(int argc, char *argv[])
{
	FILE *fp;
	int ret = 0;
	char *s;

	ARGBEGIN {
	case 'A':
		s = EARGF(usage());
		if (strlen(s) != 1 || !strchr("doxn", s[0]))
			usage();
		radix = s[0];
		break;
	case 'j':
		if ((skip = parseoffset(EARGF(usage()))) < 0)
			return 1;
		break;
	case 'N':
		if ((maxbytes = parseoffset(EARGF(usage()))) < 0)
			return 1;
		break;
	case 't':
		s = EARGF(usage());
		if (strlen(s) != 1 || !strchr("acdoux", s[0]))
			usage();
		type = s[0];
		break;
	case 'v':
		/* Always set. Use "uniq -f 1 -c" to handle duplicate lines. */
		break;
	default:
		usage();
	} ARGEND;

	if (!argc) {
		od(stdin, "<stdin>", stdout, "<stdout>");
	} else {
		for (; *argv; argc--, argv++) {
			if (!strcmp(*argv, "-")) {
				*argv = "<stdin>";
				fp = stdin;
			} else if (!(fp = fopen(*argv, "r"))) {
				weprintf("fopen %s:", *argv);
				ret = 1;
				continue;
			}
			od(fp, *argv, stdout, "<stdout>");
			if (fp != stdin && fshut(fp, *argv))
				ret = 1;
		}
	}

	ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>") |
	       fshut(stderr, "<stderr>");

	return ret;
}
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`/* See LICENSE file for copyright and license details. */`
Add -j and -N to od(1) and update README With parseoffset(), it's rather trivial to implement POSIX' rather obscure commandments. The -j and -N-flags should be ready to go! 2015-09-29 19:50:56 -04:00			`#include <ctype.h>`
Some small changes for od(1) 1) Move usage() down above main(). 2) Consistently use printaddress() across the code. 3) Use off_t instead of size_t for file offsets. 2015-09-29 18:08:58 -04:00			`#include <inttypes.h>`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`#include <stdlib.h>`
			`#include <string.h>`

			`#include "util.h"`

Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`static size_t bytes_per_line = 16;`
Add -j and -N to od(1) and update README With parseoffset(), it's rather trivial to implement POSIX' rather obscure commandments. The -j and -N-flags should be ready to go! 2015-09-29 19:50:56 -04:00			`static off_t maxbytes = -1;`
			`static off_t skip = 0;`
Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`static unsigned char radix = 'o';`
			`static unsigned char type = 'o';`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00
			`static void`
Some small changes for od(1) 1) Move usage() down above main(). 2) Consistently use printaddress() across the code. 3) Use off_t instead of size_t for file offsets. 2015-09-29 18:08:58 -04:00			`printaddress(FILE *f, off_t addr)`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`{`
Some small changes for od(1) 1) Move usage() down above main(). 2) Consistently use printaddress() across the code. 3) Use off_t instead of size_t for file offsets. 2015-09-29 18:08:58 -04:00			`char fmt[] = "%07j# ";`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00
Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`if (radix == 'n') {`
			`fputc(' ', f);`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`} else {`
Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`fmt[4] = radix;`
Some small changes for od(1) 1) Move usage() down above main(). 2) Consistently use printaddress() across the code. 3) Use off_t instead of size_t for file offsets. 2015-09-29 18:08:58 -04:00			`fprintf(f, fmt, (intmax_t)addr);`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`}`
			`}`

			`static void`
Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`printchar(FILE *f, unsigned char c)`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`{`
Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`const char *namedict[] = {`
			`"nul", "soh", "stx", "etx", "eot", "enq", "ack",`
			`"bel", "bs", "ht", "nl", "vt", "ff", "cr",`
			`"so", "si", "dle", "dc1", "dc2", "dc3", "dc4",`
			`"nak", "syn", "etb", "can", "em", "sub", "esc",`
			`"fs", "gs", "rs", "us", "sp",`
			`};`
			`const char *escdict[] = {`
			`['\0'] = "\\0", ['\a'] = "\\a",`
			`['\b'] = "\\b", ['\t'] = "\\t",`
			`['\n'] = "\\n", ['\v'] = "\\v",`
			`['\f'] = "\\f", ['\r'] = "\\r",`
			`};`
			`const char *fmtdict[] = {`
			`['d'] = "%4hhd ", ['o'] = "%03hho ",`
			`['u'] = "%3hhu ", ['x'] = "%02hhx ",`
			`};`

Add -j and -N to od(1) and update README With parseoffset(), it's rather trivial to implement POSIX' rather obscure commandments. The -j and -N-flags should be ready to go! 2015-09-29 19:50:56 -04:00			`switch (type) {`
			`case 'a':`
			`c &= ~128; /* clear high bit as required by standard */`
			`if (c < LEN(namedict) \|\| c == 127) {`
			`fprintf(f, "%3s ", (c == 127) ? "del" : namedict[c]);`
			`} else {`
			`fprintf(f, "%3c ", c);`
			`}`
			`break;`
			`case 'c':`
Match \v as escaped character Thanks Greg for noticing this! 2015-10-02 09:43:17 -04:00			`if (strchr("\a\b\t\n\v\f\r\0", c)) {`
Add -j and -N to od(1) and update README With parseoffset(), it's rather trivial to implement POSIX' rather obscure commandments. The -j and -N-flags should be ready to go! 2015-09-29 19:50:56 -04:00			`fprintf(f, "%3s ", escdict[c]);`
			`} else {`
			`fprintf(f, "%3c ", c);`
Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`}`
Add -j and -N to od(1) and update README With parseoffset(), it's rather trivial to implement POSIX' rather obscure commandments. The -j and -N-flags should be ready to go! 2015-09-29 19:50:56 -04:00			`break;`
			`default:`
			`fprintf(f, fmtdict[type], c);`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`}`
			`}`

			`static void`
Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`od(FILE in, char in_name, FILE out, char out_name)`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`{`
Some small changes for od(1) 1) Move usage() down above main(). 2) Consistently use printaddress() across the code. 3) Use off_t instead of size_t for file offsets. 2015-09-29 18:08:58 -04:00			`off_t addr;`
			`size_t i, chunklen;`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`unsigned char buf[BUFSIZ];`

Some small changes for od(1) 1) Move usage() down above main(). 2) Consistently use printaddress() across the code. 3) Use off_t instead of size_t for file offsets. 2015-09-29 18:08:58 -04:00			`for (addr = 0; (chunklen = fread(buf, 1, BUFSIZ, in)); ) {`
Add -j and -N to od(1) and update README With parseoffset(), it's rather trivial to implement POSIX' rather obscure commandments. The -j and -N-flags should be ready to go! 2015-09-29 19:50:56 -04:00			`for (i = 0; i < chunklen && (maxbytes == -1 \|\|`
			`(addr - skip) < maxbytes); ++i, ++addr) {`
			`if (addr - skip < 0)`
			`continue;`
			`if (((addr - skip) % bytes_per_line) == 0) {`
			`if (addr - skip)`
Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`fputc('\n', out);`
			`printaddress(out, addr);`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`}`
Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`printchar(out, buf[i]);`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`}`
Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`if (feof(in) \|\| ferror(in) \|\| ferror(out))`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`break;`
			`}`
Fix last offset output using skip in od(1) Yeah, if the skipping is longer than the file itself, we need to take the skip value, not the address. Also, only print the last newline when we've actually printed at least 1 address. 2015-09-30 14:06:22 -04:00			`if (addr - skip > 0)`
Add -j and -N to od(1) and update README With parseoffset(), it's rather trivial to implement POSIX' rather obscure commandments. The -j and -N-flags should be ready to go! 2015-09-29 19:50:56 -04:00			`fputc('\n', out);`
Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`if (radix != 'n') {`
Fix last offset output using skip in od(1) Yeah, if the skipping is longer than the file itself, we need to take the skip value, not the address. Also, only print the last newline when we've actually printed at least 1 address. 2015-09-30 14:06:22 -04:00			`printaddress(out, MAX(addr, skip));`
Some small changes for od(1) 1) Move usage() down above main(). 2) Consistently use printaddress() across the code. 3) Use off_t instead of size_t for file offsets. 2015-09-29 18:08:58 -04:00			`fputc('\n', out);`
Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`}`
Some small changes for od(1) 1) Move usage() down above main(). 2) Consistently use printaddress() across the code. 3) Use off_t instead of size_t for file offsets. 2015-09-29 18:08:58 -04:00			`}`

			`static void`
			`usage(void)`
			`{`
Implement od(1) v-flag If this flag is not given, od(1) automatically replaces duplicate adjacent lines with an '' for each reoccurence. If this flag is set, thus, no such filtering occurs. In this case this would mean having to somehow keep the last printed line in some backbuffer, building the next line and then doing the necessary comparisons. This basically means that we duplicate the functionality provided with uniq(1). So instead of $ od -t a > dump you'd rather do $ od -t a \| uniq -f 1 -c > dump Skipping the first field is necessary, as the addresses obviously differ. Now, I was thinking hard why this flag even exists. If POSIX mandated to add the address before the asterisk, so we know the offset of duplicate occurrences, this would make sense. However, this is not the case. Using uniq(1) also gives nicer output: ~ $ echo "111111111111111111111111111111111111111111111111" \| od -t a -v \| uniq -f 1 -c 3 0000000 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0000060 nl 1 0000061 in comparison to $ echo "111111111111111111111111111111111111111111111111" \| od -t a 0000000 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0000060 nl 0000061 Before working on od(1), I didn't even know it would filter out duplicate adjacent lines like that. This is also a matter of predictability. Concluding, the v-flag is implicitly set and users urged to just use the existing tools provided by the system. I don't think we would break scripts either. Firstly, it's rather unlikely to have duplicate lines exactly matching the line-length of od(1). Secondly, even if a script did that specifically, in the worst case there would be a counting error or something. Given od(1) is mostly used interactively, we can safely assume this feature is for the benefit of the users. Ditch this legacy POSIX crap! Please enter the commit message for your changes. Lines starting 2015-09-30 06:54:24 -04:00			`eprintf("usage: %s [-A d\|o\|x\|n] [-t a\|c\|d\|o\|u\|x] [-v] [file ...]\n", argv0);`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`}`

			`int`
			`main(int argc, char *argv[])`
			`{`
			`FILE *fp;`
			`int ret = 0;`
Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`char *s;`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00
			`ARGBEGIN {`
			`case 'A':`
			`s = EARGF(usage());`
Check explicit length in od(1) If you pass an empty string, the null-char will always match with strchr()! 2015-09-29 17:14:59 -04:00			`if (strlen(s) != 1 \|\| !strchr("doxn", s[0]))`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`usage();`
Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`radix = s[0];`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`break;`
Add -j and -N to od(1) and update README With parseoffset(), it's rather trivial to implement POSIX' rather obscure commandments. The -j and -N-flags should be ready to go! 2015-09-29 19:50:56 -04:00			`case 'j':`
Check parseoffset return value in od(1) Yeah, kinda embarassing. Forgot about that. 2015-09-30 13:13:32 -04:00			`if ((skip = parseoffset(EARGF(usage()))) < 0)`
			`return 1;`
Add -j and -N to od(1) and update README With parseoffset(), it's rather trivial to implement POSIX' rather obscure commandments. The -j and -N-flags should be ready to go! 2015-09-29 19:50:56 -04:00			`break;`
			`case 'N':`
Check parseoffset return value in od(1) Yeah, kinda embarassing. Forgot about that. 2015-09-30 13:13:32 -04:00			`if ((maxbytes = parseoffset(EARGF(usage()))) < 0)`
			`return 1;`
Add -j and -N to od(1) and update README With parseoffset(), it's rather trivial to implement POSIX' rather obscure commandments. The -j and -N-flags should be ready to go! 2015-09-29 19:50:56 -04:00			`break;`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`case 't':`
Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`s = EARGF(usage());`
Check explicit length in od(1) If you pass an empty string, the null-char will always match with strchr()! 2015-09-29 17:14:59 -04:00			`if (strlen(s) != 1 \|\| !strchr("acdoux", s[0]))`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`usage();`
Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`type = s[0];`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`break;`
Implement od(1) v-flag If this flag is not given, od(1) automatically replaces duplicate adjacent lines with an '' for each reoccurence. If this flag is set, thus, no such filtering occurs. In this case this would mean having to somehow keep the last printed line in some backbuffer, building the next line and then doing the necessary comparisons. This basically means that we duplicate the functionality provided with uniq(1). So instead of $ od -t a > dump you'd rather do $ od -t a \| uniq -f 1 -c > dump Skipping the first field is necessary, as the addresses obviously differ. Now, I was thinking hard why this flag even exists. If POSIX mandated to add the address before the asterisk, so we know the offset of duplicate occurrences, this would make sense. However, this is not the case. Using uniq(1) also gives nicer output: ~ $ echo "111111111111111111111111111111111111111111111111" \| od -t a -v \| uniq -f 1 -c 3 0000000 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0000060 nl 1 0000061 in comparison to $ echo "111111111111111111111111111111111111111111111111" \| od -t a 0000000 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0000060 nl 0000061 Before working on od(1), I didn't even know it would filter out duplicate adjacent lines like that. This is also a matter of predictability. Concluding, the v-flag is implicitly set and users urged to just use the existing tools provided by the system. I don't think we would break scripts either. Firstly, it's rather unlikely to have duplicate lines exactly matching the line-length of od(1). Secondly, even if a script did that specifically, in the worst case there would be a counting error or something. Given od(1) is mostly used interactively, we can safely assume this feature is for the benefit of the users. Ditch this legacy POSIX crap! Please enter the commit message for your changes. Lines starting 2015-09-30 06:54:24 -04:00			`case 'v':`
			`/* Always set. Use "uniq -f 1 -c" to handle duplicate lines. */`
			`break;`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00			`default:`
			`usage();`
			`} ARGEND;`

			`if (!argc) {`
			`od(stdin, "<stdin>", stdout, "<stdout>");`
			`} else {`
			`for (; *argv; argc--, argv++) {`
			`if (!strcmp(*argv, "-")) {`
			`*argv = "<stdin>";`
			`fp = stdin;`
			`} else if (!(fp = fopen(*argv, "r"))) {`
			`weprintf("fopen %s:", *argv);`
			`ret = 1;`
			`continue;`
			`}`
			`od(fp, *argv, stdout, "<stdout>");`
			`if (fp != stdin && fshut(fp, *argv))`
			`ret = 1;`
			`}`
			`}`

Refactor parts of od(1) It was possible to make some sections of the code shorter. Also fix a bug where the last printed address was always in hex rather than depending on the radix chosen. 2015-09-29 17:09:59 -04:00			`ret \|= fshut(stdin, "<stdin>") \| fshut(stdout, "<stdout>") \|`
			`fshut(stderr, "<stderr>");`
Created od, with improvements suggested by FRIGN 2015-09-28 21:02:17 -04:00
			`return ret;`
			`}`