Support NUL containing lines in uniq(1)

This commit is contained in:
FRIGN 2016-03-10 07:02:04 +01:00 committed by sin
parent b58884312c
commit e537186ba4
2 changed files with 39 additions and 35 deletions

2
README
View File

@ -97,7 +97,7 @@ The following tools are implemented:
0=*|o tty . 0=*|o tty .
0=*|o uname . 0=*|o uname .
0#*|o unexpand . 0#*|o unexpand .
=*|o uniq . 0=*|o uniq .
0=*|o unlink . 0=*|o unlink .
0=*|o uudecode . 0=*|o uudecode .
0=*|o uuencode . 0=*|o uuencode .

72
uniq.c
View File

@ -4,6 +4,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "text.h"
#include "util.h" #include "util.h"
static const char *countfmt = ""; static const char *countfmt = "";
@ -12,59 +13,60 @@ static int uflag = 0;
static int fskip = 0; static int fskip = 0;
static int sskip = 0; static int sskip = 0;
static char *prevline = NULL; static struct line prevl;
static char *prevoffset = NULL; static ssize_t prevoff = -1;
static long prevlinecount = 0; static long prevlinecount = 0;
static size_t prevlinesiz = 0;
static const char * static size_t
uniqskip(const char *l) uniqskip(struct line *l)
{ {
const char *lo = l; size_t i;
int f = fskip, s = sskip; int f = fskip, s = sskip;
for (; f; --f) { for (i = 0; i < l->len && f; --f) {
while (isblank(*lo)) while (isblank(l->data[i]))
lo++; i++;
while (*lo && !isblank(*lo)) while (i < l->len && !isblank(l->data[i]))
lo++; i++;
} }
for (; s && *lo && *lo != '\n'; --s, ++lo); for (; s && i < l->len && l->data[i] != '\n'; --s, i++)
;
return lo; return i;
} }
static void static void
uniqline(FILE *ofp, const char *l, size_t len) uniqline(FILE *ofp, struct line *l)
{ {
const char *loffset = l ? uniqskip(l) : l; size_t loff;
int linesequel = l && prevoffset && if (l) {
!strcmp(loffset, prevoffset); loff = uniqskip(l);
if (linesequel) { if (prevoff >= 0 && (l->len - loff) == (prevl.len - prevoff) &&
++prevlinecount; !memcmp(l->data + loff, prevl.data + prevoff, l->len - loff)) {
return; ++prevlinecount;
return;
}
} }
if (prevoffset) { if (prevoff >= 0) {
if ((prevlinecount == 1 && !dflag) || if ((prevlinecount == 1 && !dflag) ||
(prevlinecount != 1 && !uflag)) { (prevlinecount != 1 && !uflag)) {
if (*countfmt) if (*countfmt)
fprintf(ofp, countfmt, prevlinecount); fprintf(ofp, countfmt, prevlinecount);
fputs(prevline, ofp); fwrite(prevl.data, 1, prevl.len, ofp);
} }
prevoffset = NULL; prevoff = -1;
} }
if (l) { if (l) {
if (!prevline || len >= prevlinesiz) { if (!prevl.data || l->len >= prevl.len) {
prevlinesiz = len + 1; prevl.len = l->len;
prevline = erealloc(prevline, prevlinesiz); prevl.data = erealloc(prevl.data, prevl.len);
} }
memcpy(prevline, l, len); memcpy(prevl.data, l->data, prevl.len);
prevline[len] = '\0'; prevoff = loff;
prevoffset = prevline + (loffset - l);
} }
prevlinecount = 1; prevlinecount = 1;
} }
@ -72,18 +74,20 @@ uniqline(FILE *ofp, const char *l, size_t len)
static void static void
uniq(FILE *fp, FILE *ofp) uniq(FILE *fp, FILE *ofp)
{ {
char *buf = NULL; static struct line line;
size_t size = 0; static size_t size;
ssize_t len; ssize_t len;
while ((len = getline(&buf, &size, fp)) > 0) while ((len = getline(&line.data, &size, fp)) > 0) {
uniqline(ofp, buf, (size_t)len); line.len = len;
uniqline(ofp, &line);
}
} }
static void static void
uniqfinish(FILE *ofp) uniqfinish(FILE *ofp)
{ {
uniqline(ofp, NULL, 0); uniqline(ofp, NULL);
} }
static void static void