From 4873021e373542bec71187410b1bd9fff6ef2ca5 Mon Sep 17 00:00:00 2001 From: Renaud Fivet Date: Tue, 16 May 2017 12:13:12 +0800 Subject: [PATCH] &left and &mid handling of UTF-8 encoded characters. --- eval.c | 48 +++++++++++++++++++++++++++++++++++++++++++----- utf8.c | 8 +++----- 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/eval.c b/eval.c index cc67543..379bf0c 100644 --- a/eval.c +++ b/eval.c @@ -412,8 +412,22 @@ static char *gtfun( char *fname) { retstr = result ; } break ; - case UFLEFT | DYNAMIC: - sz = atoi( arg2) ; + case UFLEFT | DYNAMIC: { + int sz1, i ; + + sz1 = strlen( arg1) ; + sz = 0 ; + for( i = atoi( arg2) ; i > 0 ; i -= 1) { + unicode_t c ; + int bytc ; + + bytc = utf8_to_unicode( arg1, sz, sz1, &c) ; + if( bytc == 0) + break ; + else + sz += bytc ; + } + if( sz >= ressize) { free( result) ; result = malloc( sz + 1) ; @@ -423,6 +437,7 @@ static char *gtfun( char *fname) { strncpy( result, arg1, sz) ; result[ sz] = 0 ; retstr = result ; + } break ; case UFRIGHT | DYNAMIC: sz = atoi( arg2) ; @@ -434,17 +449,40 @@ static char *gtfun( char *fname) { retstr = strcpy( result, &arg1[ strlen( arg1) - sz]) ; break ; - case UFMID | TRINAMIC: - sz = atoi( arg3) ; + case UFMID | TRINAMIC: { + int sz1, start, i, bytc ; + unicode_t c ; + + sz1 = strlen( arg1) ; + start = 0 ; + for( i = atoi( arg2) - 1 ; i > 0 ; i -= 1) { + bytc = utf8_to_unicode( arg1, start, sz1, &c) ; + if( bytc == 0) + break ; + else + start += bytc ; + } + + sz = start ; + for( i = atoi( arg3) ; i > 0 ; i -= 1) { + bytc = utf8_to_unicode( arg1, sz, sz1, &c) ; + if( bytc == 0) + break ; + else + sz += bytc ; + } + + sz -= start ; if( sz >= ressize) { free( result) ; result = malloc( sz + 1) ; ressize = sz + 1 ; } - strncpy( result, &arg1[ atoi( arg2) - 1], sz) ; + strncpy( result, &arg1[ start], sz) ; result[ sz] = 0 ; retstr = result ; + } break ; case UFNOT | MONAMIC: retstr = ltos( stol( arg1) == FALSE) ; diff --git a/utf8.c b/utf8.c index 175d3d2..511e4d7 100644 --- a/utf8.c +++ b/utf8.c @@ -17,17 +17,15 @@ * NOTE 2! This does *not* verify things like minimality. So overlong forms * are happily accepted and decoded, as are the various "invalid values". */ -unsigned utf8_to_unicode(char *line, unsigned index, unsigned len, unicode_t *res) -{ +unsigned utf8_to_unicode( char *line, unsigned index, unsigned len, + unicode_t *res) { unicode_t value ; unsigned c ; unsigned bytes, mask, i; - assert( index < len) ; -#ifdef NDEBUG if( index >= len) return 0 ; -#endif + *res = c = line[ index] & 0xFFU ; /*