sort: reuse buffer in columns()

speeds up sorting for huge input aswell.
This commit is contained in:
Hiltjo Posthuma 2015-05-05 13:41:43 +02:00 committed by sin
parent 0b85532616
commit 72250324b1
1 changed files with 20 additions and 14 deletions

34
sort.c
View File

@ -38,10 +38,12 @@ static int parse_keydef(struct keydef *, char *, int);
static char *skipblank(char *); static char *skipblank(char *);
static char *skipnonblank(char *); static char *skipnonblank(char *);
static char *skipcolumn(char *, char *, int); static char *skipcolumn(char *, char *, int);
static char *columns(char *, const struct keydef *); static size_t columns(char *, const struct keydef *, char **, size_t *);
static int Cflag = 0, cflag = 0, uflag = 0; static int Cflag = 0, cflag = 0, uflag = 0;
static char *fieldsep = NULL; static char *fieldsep = NULL;
static char *col1, *col2;
static size_t col1siz, col2siz;
static void static void
addkeydef(char *def, int flags) addkeydef(char *def, int flags)
@ -81,32 +83,28 @@ check(FILE *fp)
static int static int
linecmp(const char **a, const char **b) linecmp(const char **a, const char **b)
{ {
char *s1, *s2;
int res = 0; int res = 0;
long double x, y; long double x, y;
struct kdlist *node; struct kdlist *node;
for (node = head; node && res == 0; node = node->next) { for (node = head; node && res == 0; node = node->next) {
s1 = columns((char *)*a, &node->keydef); columns((char *)*a, &node->keydef, &col1, &col1siz);
s2 = columns((char *)*b, &node->keydef); columns((char *)*b, &node->keydef, &col2, &col2siz);
/* if -u is given, don't use default key definition /* if -u is given, don't use default key definition
* unless it is the only one */ * unless it is the only one */
if (uflag && node == tail && head != tail) { if (uflag && node == tail && head != tail) {
res = 0; res = 0;
} else if (node->keydef.flags & MOD_N) { } else if (node->keydef.flags & MOD_N) {
x = strtold(s1, NULL); x = strtold(col1, NULL);
y = strtold(s2, NULL); y = strtold(col2, NULL);
res = x < y ? -1 : x > y; res = x < y ? -1 : x > y;
} else { } else {
res = strcmp(s1, s2); res = strcmp(col1, col2);
} }
if (node->keydef.flags & MOD_R) if (node->keydef.flags & MOD_R)
res = -res; res = -res;
free(s1);
free(s2);
} }
return res; return res;
@ -202,10 +200,11 @@ skipcolumn(char *s, char *eol, int next_col)
return s; return s;
} }
static char * static size_t
columns(char *line, const struct keydef *kd) columns(char *line, const struct keydef *kd, char **col, size_t *colsiz)
{ {
char *start, *end, *eol = strchr(line, '\n'); char *start, *end, *eol = strchr(line, '\n');
size_t len;
int i; int i;
for (i = 1, start = line; i < kd->start_column; i++) for (i = 1, start = line; i < kd->start_column; i++)
@ -226,7 +225,14 @@ columns(char *line, const struct keydef *kd)
} else { } else {
end = eol; end = eol;
} }
return enstrndup(2, start, start > end ? 0 : end - start); len = start > end ? 0 : end - start;
if (!*col || *colsiz < len)
*col = erealloc(*col, len + 1);
memcpy(*col, start, len);
(*col)[len] = '\0';
if (*colsiz < len)
*colsiz = len;
return len;
} }
static void static void
@ -258,7 +264,7 @@ main(int argc, char *argv[])
addkeydef(EARGF(usage()), global_flags); addkeydef(EARGF(usage()), global_flags);
break; break;
case 'm': case 'm':
/* more or less for free, but for perfomance-reasons, /* more or less for free, but for performance-reasons,
* we should keep this flag in mind and maybe some later * we should keep this flag in mind and maybe some later
* day implement it properly so we don't run out of memory * day implement it properly so we don't run out of memory
* while merging large sorted files. * while merging large sorted files.