1
0
mirror of https://git.zap.org.au/git/trader.git synced 2025-01-03 14:57:41 -05:00

Add the functions xwcrtomb() and xmbstowcs()

The xwcrtomb() and xmbstowcs() functions replace illegal characters with
EILSEQ_REPL ('?').  Adjust other functions, particularly mkchstr() and
friends, to suit.
This commit is contained in:
John Zaitseff 2011-08-20 13:03:40 +10:00
parent 336611559b
commit 7b544bbd35
4 changed files with 143 additions and 81 deletions

View File

@ -1069,68 +1069,51 @@ int mkchstr_parse (const wchar_t *restrict format,
void mkchstr_conv (chtype *restrict chbuf, int chbufsize,
wchar_t *restrict wcbuf, chtype *restrict attrbuf)
{
char *convbuf = xmalloc(chbufsize);
mbstate_t mbstate;
wchar_t *wp;
char convbuf[MB_LEN_MAX + 1];
char endbuf[MB_LEN_MAX];
mbstate_t mbstate, mbcopy;
size_t endsize, n;
char *p;
bool done;
size_t n;
/* Perform a preliminary conversion to weed out any problems with
EILSEQ and insufficient buffer space. */
while (true) {
memset(&mbstate, 0, sizeof(mbstate));
wp = wcbuf;
if (wcsrtombs(convbuf, (const wchar_t **) &wp, chbufsize, &mbstate)
== (size_t) -1) {
if (errno == EILSEQ) {
/* Replace problematic wide characters with a known-good
(ASCII) one. This is better than terminating! */
*wp = EILSEQ_REPL;
} else {
errno_exit(_("mkchstr_conv: `%ls'"), wcbuf);
}
} else if (wp != NULL) {
// convbuf is too small: truncate wcbuf if possible
if (wp == wcbuf) {
errno = E2BIG;
errno_exit(_("mkchstr_conv: `%ls'"), wcbuf);
} else {
*(wp - 1) = '\0';
}
} else {
// wcbuf CAN fit into convbuf when converted
break;
}
}
// Convert for real, combining each multibyte character with attrbuf
memset(&mbstate, 0, sizeof(mbstate));
done = false;
while (! done) {
// Yes, we want to convert a wide NUL, too!
if ((n = wcrtomb(convbuf, *wcbuf, &mbstate)) == (size_t) -1) {
errno_exit(_("mkchstr_conv: `%ls'"), wcbuf);
// Make sure we always have enough space for ending shift sequence
memcpy(&mbcopy, &mbstate, sizeof(mbstate));
endsize = wcrtomb(endbuf, '\0', &mbcopy);
if (endsize == (size_t) -1) {
errno_exit(_("mkchstr_conv: NUL"));
}
for (p = convbuf; n > 0; n--, p++, chbuf++) {
if (*p == '\0' || *p == '\n') {
/* This code assumes '\n' can never appear in a multibyte
string except as a control character---which is true
of all multibyte encodings (I believe!) */
*chbuf = (unsigned char) *p;
} else {
*chbuf = (unsigned char) *p | *attrbuf;
// Yes, we want to convert a wide NUL, too!
n = xwcrtomb(convbuf, *wcbuf, &mbstate);
if (chbufsize > endsize + n) {
for (p = convbuf; n > 0; n--, p++, chbuf++, chbufsize--) {
if (*p == '\0' || *p == '\n') {
/* This code assumes '\n' can never appear in a
multibyte string except as a control character---
which is true of all multibyte encodings (I
believe!) */
*chbuf = (unsigned char) *p;
} else {
*chbuf = (unsigned char) *p | *attrbuf;
}
}
} else {
// Not enough space for *wcbuf, so terminate chbuf early
for (p = endbuf; endsize > 0; endsize--, p++, chbuf++) {
*chbuf = (unsigned char) *p;
}
break;
}
done = (*wcbuf == '\0');
wcbuf++;
attrbuf++;
}
free(convbuf);
}
@ -1147,7 +1130,6 @@ int vmkchstr (chtype *restrict chbuf, int chbufsize, chtype attr_norm,
struct convspec *spec;
const wchar_t *wcformat;
wchar_t *orig_wcformat;
mbstate_t mbstate;
wchar_t *outbuf, *orig_outbuf;
chtype *attrbuf, *orig_attrbuf;
@ -1176,16 +1158,7 @@ int vmkchstr (chtype *restrict chbuf, int chbufsize, chtype attr_norm,
fmtbuf = xmalloc(BUFSIZE * sizeof(wchar_t));
// Convert format to a wide-character string
{
memset(&mbstate, 0, sizeof(mbstate));
const char *p = format;
if (mbsrtowcs(orig_wcformat, &p, BUFSIZE, &mbstate) == (size_t) -1) {
goto error;
} else if (p != NULL) {
errno = E2BIG;
goto error;
}
}
xmbstowcs(orig_wcformat, format, BUFSIZE);
if (mkchstr_parse(wcformat, format_arg, format_spec, args) < 0) {
goto error;
@ -1269,8 +1242,7 @@ int vmkchstr (chtype *restrict chbuf, int chbufsize, chtype attr_norm,
}
if (wc == '\0' || wc == WEOF) {
errno = EILSEQ;
goto error;
wc = EILSEQ_REPL;
}
fmtbuf[0] = wc;
@ -1319,7 +1291,6 @@ int vmkchstr (chtype *restrict chbuf, int chbufsize, chtype attr_norm,
/* strfmon() is not available in a wide-char
version, so we need a multibyte char buffer */
char *buf = xmalloc(BUFSIZE);
const char *p = buf;
if (l_strfmon(buf, BUFSIZE, spec->flag_nosym ? "%!n" : "%n",
format_arg[spec->arg_num].a.a_double) < 0) {
@ -1329,19 +1300,7 @@ int vmkchstr (chtype *restrict chbuf, int chbufsize, chtype attr_norm,
goto error;
}
memset(&mbstate, 0, sizeof(mbstate));
if (mbsrtowcs(fmtbuf, &p, BUFSIZE, &mbstate)
== (size_t) -1) {
saved_errno = errno;
free(buf);
errno = saved_errno;
goto error;
} else if (p != NULL) {
free(buf);
errno = E2BIG;
goto error;
}
xmbstowcs(fmtbuf, buf, BUFSIZE);
free(buf);
}
@ -1357,14 +1316,7 @@ int vmkchstr (chtype *restrict chbuf, int chbufsize, chtype attr_norm,
if (p == NULL) {
str = NULL;
} else {
memset(&mbstate, 0, sizeof(mbstate));
if (mbsrtowcs(fmtbuf, &p, BUFSIZE, &mbstate)
== (size_t) -1) {
goto error;
} else if (p != NULL) {
errno = E2BIG;
goto error;
}
xmbstowcs(fmtbuf, p, BUFSIZE);
str = fmtbuf;
}
}

View File

@ -55,6 +55,7 @@
#include <stdlib.h>
#include <stdbool.h>
#include <stdarg.h>
#include <limits.h>
#include <locale.h>
#include <ctype.h>
#include <string.h>

View File

@ -507,5 +507,81 @@ wchar_t *xwcsdup (const wchar_t *str)
}
/***********************************************************************/
// xmbstowcs: Convert a multibyte string to a wide-character string
size_t xmbstowcs (wchar_t *restrict dest, const char *restrict src, size_t len)
{
assert(dest != NULL);
assert(len > 0);
char *s = xstrdup(src);
size_t n;
while (true) {
mbstate_t mbstate;
char *p = s;
memset(&mbstate, 0, sizeof(mbstate));
if ((n = mbsrtowcs(dest, (const char **) &p, len, &mbstate))
== (size_t) -1) {
if (errno == EILSEQ) {
// Illegal sequence detected: replace it and try again
*p = EILSEQ_REPL;
} else {
errno_exit(_("xmbstowcs: `%s'"), src);
}
} else if (p != NULL) {
// Multibyte string was too long: truncate dest
dest[len - 1] = '\0';
n--;
break;
} else {
break;
}
}
free(s);
return n;
}
/***********************************************************************/
// xwcrtomb: Convert a wide character to a multibyte sequence
size_t xwcrtomb (char *restrict dest, wchar_t wc, mbstate_t *restrict mbstate)
{
mbstate_t mbcopy;
size_t n;
assert(dest != NULL);
assert(mbstate != NULL);
memcpy(&mbcopy, mbstate, sizeof(mbcopy));
if ((n = wcrtomb(dest, wc, &mbcopy)) == (size_t) -1) {
if (errno == EILSEQ) {
/* wc cannot be represented in current locale.
Note that the shift state in mbcopy is now undefined.
Hence, restore the original, try to store an ending shift
sequence, then EILSEQ_REPL. */
memcpy(&mbcopy, mbstate, sizeof(mbcopy));
if ((n = wcrtomb(dest, '\0', &mbcopy)) == (size_t) -1) {
errno_exit(_("xwcrtomb: NUL"));
}
dest[n] = EILSEQ_REPL;
dest[n++] = '\0';
} else {
errno_exit(_("xwcrtomb: `%lc'"), wc);
}
}
memcpy(mbstate, &mbcopy, sizeof(mbcopy));
return n;
}
/***********************************************************************/
// End of file

View File

@ -343,4 +343,37 @@ extern char *xstrdup (const char *str);
extern wchar_t *xwcsdup (const wchar_t *str);
/*
Function: xmbstowcs - Convert a multibyte string to a wide-character string
Parameters: dest - Location of wide-string buffer
src - String to convert
len - Size of dest, in multiples of wchar_t
Returns: size_t - Number of characters placed in dest (excluding NUL)
This wrapper function converts a multibyte string to a wide-character
one by calling mbrtowc() continually until the whole string is
converted. If any illegal sequences are present, they are converted to
the EILSEQ_REPL character. If the destination buffer is too small, the
string is truncated.
*/
extern size_t xmbstowcs (wchar_t *restrict dest, const char *restrict src,
size_t len);
/*
Function: xwcrtomb - Convert a wide character to a multibyte sequence
Parameters: dest - Location of multibyte buffer (size >= MB_CUR_MAX + 1)
wc - Character to convert
mbstate - Pointer to current multibyte shift state
Returns: size_t - Number of characters placed in dest
This wrapper function converts the wide character in wc (which may be
NUL) by calling wcrtomb(). If wc cannot be represented in the current
locale, EILSEQ_REPL is used instead (with any characters needed to move
to an initial shift state prior to EILSEQ_REPL).
*/
extern size_t xwcrtomb (char *restrict dest, wchar_t wc,
mbstate_t *restrict mbstate);
#endif /* included_UTILS_H */