From 2c8648a9c889fc0a7c4b24367a3f81f08dedccb2 Mon Sep 17 00:00:00 2001 From: Xavier G Date: Fri, 13 May 2016 02:10:02 +0200 Subject: [PATCH] Introduce string_policy(). --- src/core/utf8.c | 13 +++++++++++++ src/core/utf8.h | 14 ++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/src/core/utf8.c b/src/core/utf8.c index 1daaf03f..d931ba19 100644 --- a/src/core/utf8.c +++ b/src/core/utf8.c @@ -25,6 +25,8 @@ #include "utf8.h" #include "module.h" #include "wcwidth.c" +/* Provide is_utf8(): */ +#include "recode.h" int string_advance(char const **str, gboolean utf8) { @@ -41,3 +43,14 @@ int string_advance(char const **str, gboolean utf8) return 1; } } + +int string_policy(const char *str) +{ + if (is_utf8()) { + if (!str || g_utf8_validate(str, -1, NULL)) { + /* No string provided or valid UTF-8 string: treat as UTF-8: */ + return TREAT_STRING_AS_UTF8; + } + } + return TREAT_STRING_AS_BYTES; +} diff --git a/src/core/utf8.h b/src/core/utf8.h index 50ee0886..fa11b737 100644 --- a/src/core/utf8.h +++ b/src/core/utf8.h @@ -19,6 +19,20 @@ int mk_wcwidth(unichar c); */ int string_advance(char const **str, gboolean utf8); +/* TREAT_STRING_AS_BYTES means strings are to be treated using strncpy, + * strnlen, etc. + * TREAT_STRING_AS_UTF8 means strings are to be treated using g_utf8_* + * functions. + */ +#define TREAT_STRING_AS_BYTES 0 +#define TREAT_STRING_AS_UTF8 1 + +/* Return how the str string ought to be treated: TREAT_STRING_AS_UTF8 if the + * terminal handles UTF-8 and if the string appears to be a valid UTF-8 string; + * TREAT_STRING_AS_BYTES otherwise. + */ +int string_policy(const char *str); + #define unichar_isprint(c) (((c) & ~0x80) >= 32) #define is_utf8_leading(c) (((c) & 0xc0) != 0x80)