From bdbd2d5437ffa89f162081c3c759e4961f391e0b Mon Sep 17 00:00:00 2001 From: Renaud Fivet Date: Sun, 8 Feb 2015 14:26:07 +0800 Subject: [PATCH] Assert that unicode are limited to 0-10FFFF. --- eval.c | 17 +++++++++++++---- utf8.c | 10 ++++++---- utf8.h | 6 +++--- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/eval.c b/eval.c index cb16a3f..99a6545 100644 --- a/eval.c +++ b/eval.c @@ -523,10 +523,19 @@ static char *gtfun( char *fname) { } break ; - case UFCHR: - sz = unicode_to_utf8( atoi( argx), result) ; - result[ sz] = 0 ; - retstr = result ; + case UFCHR: { + unicode_t c ; + + c = atoi( argx) ; + if( c > 0x10FFFF) + retstr = errorm ; + else { + sz = unicode_to_utf8( c, result) ; + result[ sz] = 0 ; + retstr = result ; + } + } + break ; case UFGTKEY: result[0] = tgetc(); diff --git a/utf8.c b/utf8.c index 4ee423f..686e988 100644 --- a/utf8.c +++ b/utf8.c @@ -1,5 +1,7 @@ #include "utf8.h" +#include + /* * utf8_to_unicode() * @@ -84,11 +86,11 @@ static void reverse_string(char *begin, char *end) * possible sequence, while utf8_to_unicode() accepts both Latin1 and * overlong utf-8 sequences. */ -unsigned unicode_to_utf8(unsigned int c, char *utf8) -{ - int bytes = 1; +unsigned unicode_to_utf8( unicode_t c, char *utf8) { + int bytes = 1 ; - *utf8 = c; + assert( c <= 0x10FFFF) ; + *utf8 = c ; if (c > 0x7f) { int prefix = 0x40; char *p = utf8; diff --git a/utf8.h b/utf8.h index 6ac547e..45e9f8d 100644 --- a/utf8.h +++ b/utf8.h @@ -1,10 +1,10 @@ #ifndef UTF8_H #define UTF8_H -typedef unsigned int unicode_t; +typedef unsigned int unicode_t ; unsigned utf8_to_unicode( char *line, unsigned index, unsigned len, - unicode_t *res) ; -unsigned unicode_to_utf8( unsigned int c, char *utf8) ; + unicode_t *res) ; +unsigned unicode_to_utf8( unicode_t c, char *utf8) ; #endif