From bdbd2d5437ffa89f162081c3c759e4961f391e0b Mon Sep 17 00:00:00 2001
From: Renaud Fivet <renaud.fivet@gmail.com>
Date: Sun, 8 Feb 2015 14:26:07 +0800
Subject: [PATCH] Assert that unicode are limited to 0-10FFFF.

---
 eval.c | 17 +++++++++++++----
 utf8.c | 10 ++++++----
 utf8.h |  6 +++---
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/eval.c b/eval.c
index cb16a3f..99a6545 100644
--- a/eval.c
+++ b/eval.c
@@ -523,10 +523,19 @@ static char *gtfun( char *fname) {
 		}
 
 		break ;
-	case UFCHR:
-		sz = unicode_to_utf8( atoi( argx), result) ;
-		result[ sz] = 0 ;
-		retstr = result ;		
+	case UFCHR: {
+			unicode_t c ;
+
+			c = atoi( argx) ;
+			if( c > 0x10FFFF)
+				retstr = errorm ;
+			else {
+				sz = unicode_to_utf8( c, result) ;
+				result[ sz] = 0 ;
+				retstr = result ;
+			}
+		}
+
 		break ;
 	case UFGTKEY:
 		result[0] = tgetc();
diff --git a/utf8.c b/utf8.c
index 4ee423f..686e988 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1,5 +1,7 @@
 #include "utf8.h"
 
+#include <assert.h>
+
 /*
  * utf8_to_unicode()
  *
@@ -84,11 +86,11 @@ static void reverse_string(char *begin, char *end)
  * possible sequence, while utf8_to_unicode() accepts both Latin1 and
  * overlong utf-8 sequences.
  */
-unsigned unicode_to_utf8(unsigned int c, char *utf8)
-{
-	int bytes = 1;
+unsigned unicode_to_utf8( unicode_t c, char *utf8) {
+	int bytes = 1 ;
 
-	*utf8 = c;
+	assert( c <= 0x10FFFF) ;
+	*utf8 = c ;
 	if (c > 0x7f) {
 		int prefix = 0x40;
 		char *p = utf8;
diff --git a/utf8.h b/utf8.h
index 6ac547e..45e9f8d 100644
--- a/utf8.h
+++ b/utf8.h
@@ -1,10 +1,10 @@
 #ifndef UTF8_H
 #define UTF8_H
 
-typedef unsigned int unicode_t;
+typedef unsigned int unicode_t ;
 
 unsigned utf8_to_unicode( char *line, unsigned index, unsigned len,
-															unicode_t *res) ;
-unsigned unicode_to_utf8( unsigned int c, char *utf8) ;
+                                                            unicode_t *res) ;
+unsigned unicode_to_utf8( unicode_t c, char *utf8) ;
 
 #endif