From 73577f10a0076dfa361776ead28621f59772154c Mon Sep 17 00:00:00 2001 From: FRIGN Date: Wed, 11 Feb 2015 21:32:09 +0100 Subject: [PATCH] Scrap chartorunearr(), introducing utftorunestr() Interface and function as proposed by cls. The reasoning behind this function is that cls expressed his interest to keep memory allocation out of libutf, which is a very good motive. This simplifies the function a lot and should also increase the speed a bit, but the most important factor here is that there's no malloc anywhere in libutf, making it a lot smaller and more robust with a smaller attack-surface. Look at the paste(1) and tr(1) changes for an idiomatic way to allocate the right amount of space for the Rune-array. --- Makefile | 2 +- libutf/chartorunearr.c | 27 --------------------------- libutf/utftorunestr.c | 13 +++++++++++++ paste.c | 3 ++- tr.c | 3 ++- utf.h | 4 ++-- 6 files changed, 20 insertions(+), 32 deletions(-) delete mode 100644 libutf/chartorunearr.c create mode 100644 libutf/utftorunestr.c diff --git a/Makefile b/Makefile index 50db65a..ba5d96f 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ LIBUTFSRC =\ libutf/rune.c\ libutf/runetype.c\ libutf/utf.c\ - libutf/chartorunearr.c\ + libutf/utftorunestr.c\ libutf/fgetrune.c\ libutf/fputrune.c\ libutf/isalnumrune.c\ diff --git a/libutf/chartorunearr.c b/libutf/chartorunearr.c deleted file mode 100644 index 1484d14..0000000 --- a/libutf/chartorunearr.c +++ /dev/null @@ -1,27 +0,0 @@ -/* See LICENSE file for copyright and license details. */ -#include -#include - -#include "../util.h" -#include "../utf.h" - -int -chartorunearr(const char *str, Rune **r) -{ - size_t len = strlen(str), rlen, roff, ret = 1, i; - Rune s; - - for (rlen = 0, roff = 0; roff < len && ret; rlen++) { - ret = charntorune(&s, str + roff, MAX(UTFmax, len - roff)); - roff += ret; - } - - *r = emalloc(rlen * sizeof(Rune) + 1); - (*r)[rlen] = 0; - - for (i = 0, roff = 0; roff < len && i < rlen; i++) { - roff += charntorune(&(*r)[i], str + roff, MAX(UTFmax, len - roff)); - } - - return rlen; -} diff --git a/libutf/utftorunestr.c b/libutf/utftorunestr.c new file mode 100644 index 0000000..005fe8a --- /dev/null +++ b/libutf/utftorunestr.c @@ -0,0 +1,13 @@ +/* See LICENSE file for copyright and license details. */ +#include "../utf.h" + +int +utftorunestr(const char *str, Rune *r) +{ + int i, n; + + for(i = 0; (n = chartorune(&r[i], str)) && r[i]; i++) + str += n; + + return i; +} diff --git a/paste.c b/paste.c index cd7eb3e..1b06433 100644 --- a/paste.c +++ b/paste.c @@ -110,7 +110,8 @@ main(int argc, char *argv[]) /* populate delimiters */ unescape(adelim); - len = chartorunearr(adelim, &delim); + delim = emalloc((utflen(adelim) + 1) * sizeof(*delim)); + len = utftorunestr(adelim, delim); /* populate file list */ dsc = emalloc(argc * sizeof(*dsc)); diff --git a/tr.c b/tr.c index fa3d412..a38153d 100644 --- a/tr.c +++ b/tr.c @@ -79,7 +79,8 @@ makeset(char *str, struct range **set, int (**check)(Rune)) /* rstr defines at most len ranges */ unescape(str); - len = chartorunearr(str, &rstr); + rstr = emalloc((utflen(str) + 1) * sizeof(*rstr)); + len = utftorunestr(str, rstr); *set = emalloc(len * sizeof(**set)); for (i = 0; i < len; i++) { diff --git a/utf.h b/utf.h index 203849a..a74be94 100644 --- a/utf.h +++ b/utf.h @@ -59,9 +59,9 @@ int isxdigitrune(Rune); Rune tolowerrune(Rune); Rune toupperrune(Rune); +int utftorunestr(const char*, Rune *); + int fgetrune(Rune *, FILE *); int efgetrune(Rune *, FILE *, const char *); int fputrune(const Rune *, FILE *); int efputrune(const Rune *, FILE *, const char *); - -int chartorunearr(const char*, Rune **);