mirror of
https://git.zap.org.au/git/trader.git
synced 2025-02-02 15:08:13 -05:00
Add the functions xwcrtomb() and xmbstowcs()
The xwcrtomb() and xmbstowcs() functions replace illegal characters with EILSEQ_REPL ('?'). Adjust other functions, particularly mkchstr() and friends, to suit.
This commit is contained in:
parent
336611559b
commit
7b544bbd35
114
src/intf.c
114
src/intf.c
@ -1069,68 +1069,51 @@ int mkchstr_parse (const wchar_t *restrict format,
|
|||||||
void mkchstr_conv (chtype *restrict chbuf, int chbufsize,
|
void mkchstr_conv (chtype *restrict chbuf, int chbufsize,
|
||||||
wchar_t *restrict wcbuf, chtype *restrict attrbuf)
|
wchar_t *restrict wcbuf, chtype *restrict attrbuf)
|
||||||
{
|
{
|
||||||
char *convbuf = xmalloc(chbufsize);
|
char convbuf[MB_LEN_MAX + 1];
|
||||||
mbstate_t mbstate;
|
char endbuf[MB_LEN_MAX];
|
||||||
wchar_t *wp;
|
mbstate_t mbstate, mbcopy;
|
||||||
|
size_t endsize, n;
|
||||||
char *p;
|
char *p;
|
||||||
bool done;
|
bool done;
|
||||||
size_t n;
|
|
||||||
|
|
||||||
|
|
||||||
/* Perform a preliminary conversion to weed out any problems with
|
|
||||||
EILSEQ and insufficient buffer space. */
|
|
||||||
while (true) {
|
|
||||||
memset(&mbstate, 0, sizeof(mbstate));
|
|
||||||
wp = wcbuf;
|
|
||||||
if (wcsrtombs(convbuf, (const wchar_t **) &wp, chbufsize, &mbstate)
|
|
||||||
== (size_t) -1) {
|
|
||||||
if (errno == EILSEQ) {
|
|
||||||
/* Replace problematic wide characters with a known-good
|
|
||||||
(ASCII) one. This is better than terminating! */
|
|
||||||
*wp = EILSEQ_REPL;
|
|
||||||
} else {
|
|
||||||
errno_exit(_("mkchstr_conv: `%ls'"), wcbuf);
|
|
||||||
}
|
|
||||||
} else if (wp != NULL) {
|
|
||||||
// convbuf is too small: truncate wcbuf if possible
|
|
||||||
if (wp == wcbuf) {
|
|
||||||
errno = E2BIG;
|
|
||||||
errno_exit(_("mkchstr_conv: `%ls'"), wcbuf);
|
|
||||||
} else {
|
|
||||||
*(wp - 1) = '\0';
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// wcbuf CAN fit into convbuf when converted
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert for real, combining each multibyte character with attrbuf
|
|
||||||
memset(&mbstate, 0, sizeof(mbstate));
|
memset(&mbstate, 0, sizeof(mbstate));
|
||||||
done = false;
|
done = false;
|
||||||
while (! done) {
|
while (! done) {
|
||||||
// Yes, we want to convert a wide NUL, too!
|
// Make sure we always have enough space for ending shift sequence
|
||||||
if ((n = wcrtomb(convbuf, *wcbuf, &mbstate)) == (size_t) -1) {
|
memcpy(&mbcopy, &mbstate, sizeof(mbstate));
|
||||||
errno_exit(_("mkchstr_conv: `%ls'"), wcbuf);
|
endsize = wcrtomb(endbuf, '\0', &mbcopy);
|
||||||
|
if (endsize == (size_t) -1) {
|
||||||
|
errno_exit(_("mkchstr_conv: NUL"));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (p = convbuf; n > 0; n--, p++, chbuf++) {
|
// Yes, we want to convert a wide NUL, too!
|
||||||
if (*p == '\0' || *p == '\n') {
|
n = xwcrtomb(convbuf, *wcbuf, &mbstate);
|
||||||
/* This code assumes '\n' can never appear in a multibyte
|
|
||||||
string except as a control character---which is true
|
if (chbufsize > endsize + n) {
|
||||||
of all multibyte encodings (I believe!) */
|
for (p = convbuf; n > 0; n--, p++, chbuf++, chbufsize--) {
|
||||||
*chbuf = (unsigned char) *p;
|
if (*p == '\0' || *p == '\n') {
|
||||||
} else {
|
/* This code assumes '\n' can never appear in a
|
||||||
*chbuf = (unsigned char) *p | *attrbuf;
|
multibyte string except as a control character---
|
||||||
|
which is true of all multibyte encodings (I
|
||||||
|
believe!) */
|
||||||
|
*chbuf = (unsigned char) *p;
|
||||||
|
} else {
|
||||||
|
*chbuf = (unsigned char) *p | *attrbuf;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// Not enough space for *wcbuf, so terminate chbuf early
|
||||||
|
for (p = endbuf; endsize > 0; endsize--, p++, chbuf++) {
|
||||||
|
*chbuf = (unsigned char) *p;
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
done = (*wcbuf == '\0');
|
done = (*wcbuf == '\0');
|
||||||
wcbuf++;
|
wcbuf++;
|
||||||
attrbuf++;
|
attrbuf++;
|
||||||
}
|
}
|
||||||
|
|
||||||
free(convbuf);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1147,7 +1130,6 @@ int vmkchstr (chtype *restrict chbuf, int chbufsize, chtype attr_norm,
|
|||||||
struct convspec *spec;
|
struct convspec *spec;
|
||||||
const wchar_t *wcformat;
|
const wchar_t *wcformat;
|
||||||
wchar_t *orig_wcformat;
|
wchar_t *orig_wcformat;
|
||||||
mbstate_t mbstate;
|
|
||||||
|
|
||||||
wchar_t *outbuf, *orig_outbuf;
|
wchar_t *outbuf, *orig_outbuf;
|
||||||
chtype *attrbuf, *orig_attrbuf;
|
chtype *attrbuf, *orig_attrbuf;
|
||||||
@ -1176,16 +1158,7 @@ int vmkchstr (chtype *restrict chbuf, int chbufsize, chtype attr_norm,
|
|||||||
fmtbuf = xmalloc(BUFSIZE * sizeof(wchar_t));
|
fmtbuf = xmalloc(BUFSIZE * sizeof(wchar_t));
|
||||||
|
|
||||||
// Convert format to a wide-character string
|
// Convert format to a wide-character string
|
||||||
{
|
xmbstowcs(orig_wcformat, format, BUFSIZE);
|
||||||
memset(&mbstate, 0, sizeof(mbstate));
|
|
||||||
const char *p = format;
|
|
||||||
if (mbsrtowcs(orig_wcformat, &p, BUFSIZE, &mbstate) == (size_t) -1) {
|
|
||||||
goto error;
|
|
||||||
} else if (p != NULL) {
|
|
||||||
errno = E2BIG;
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mkchstr_parse(wcformat, format_arg, format_spec, args) < 0) {
|
if (mkchstr_parse(wcformat, format_arg, format_spec, args) < 0) {
|
||||||
goto error;
|
goto error;
|
||||||
@ -1269,8 +1242,7 @@ int vmkchstr (chtype *restrict chbuf, int chbufsize, chtype attr_norm,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (wc == '\0' || wc == WEOF) {
|
if (wc == '\0' || wc == WEOF) {
|
||||||
errno = EILSEQ;
|
wc = EILSEQ_REPL;
|
||||||
goto error;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fmtbuf[0] = wc;
|
fmtbuf[0] = wc;
|
||||||
@ -1319,7 +1291,6 @@ int vmkchstr (chtype *restrict chbuf, int chbufsize, chtype attr_norm,
|
|||||||
/* strfmon() is not available in a wide-char
|
/* strfmon() is not available in a wide-char
|
||||||
version, so we need a multibyte char buffer */
|
version, so we need a multibyte char buffer */
|
||||||
char *buf = xmalloc(BUFSIZE);
|
char *buf = xmalloc(BUFSIZE);
|
||||||
const char *p = buf;
|
|
||||||
|
|
||||||
if (l_strfmon(buf, BUFSIZE, spec->flag_nosym ? "%!n" : "%n",
|
if (l_strfmon(buf, BUFSIZE, spec->flag_nosym ? "%!n" : "%n",
|
||||||
format_arg[spec->arg_num].a.a_double) < 0) {
|
format_arg[spec->arg_num].a.a_double) < 0) {
|
||||||
@ -1329,19 +1300,7 @@ int vmkchstr (chtype *restrict chbuf, int chbufsize, chtype attr_norm,
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(&mbstate, 0, sizeof(mbstate));
|
xmbstowcs(fmtbuf, buf, BUFSIZE);
|
||||||
if (mbsrtowcs(fmtbuf, &p, BUFSIZE, &mbstate)
|
|
||||||
== (size_t) -1) {
|
|
||||||
saved_errno = errno;
|
|
||||||
free(buf);
|
|
||||||
errno = saved_errno;
|
|
||||||
goto error;
|
|
||||||
} else if (p != NULL) {
|
|
||||||
free(buf);
|
|
||||||
errno = E2BIG;
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
free(buf);
|
free(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1357,14 +1316,7 @@ int vmkchstr (chtype *restrict chbuf, int chbufsize, chtype attr_norm,
|
|||||||
if (p == NULL) {
|
if (p == NULL) {
|
||||||
str = NULL;
|
str = NULL;
|
||||||
} else {
|
} else {
|
||||||
memset(&mbstate, 0, sizeof(mbstate));
|
xmbstowcs(fmtbuf, p, BUFSIZE);
|
||||||
if (mbsrtowcs(fmtbuf, &p, BUFSIZE, &mbstate)
|
|
||||||
== (size_t) -1) {
|
|
||||||
goto error;
|
|
||||||
} else if (p != NULL) {
|
|
||||||
errno = E2BIG;
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
str = fmtbuf;
|
str = fmtbuf;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -55,6 +55,7 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
|
#include <limits.h>
|
||||||
#include <locale.h>
|
#include <locale.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
76
src/utils.c
76
src/utils.c
@ -507,5 +507,81 @@ wchar_t *xwcsdup (const wchar_t *str)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/***********************************************************************/
|
||||||
|
// xmbstowcs: Convert a multibyte string to a wide-character string
|
||||||
|
|
||||||
|
size_t xmbstowcs (wchar_t *restrict dest, const char *restrict src, size_t len)
|
||||||
|
{
|
||||||
|
assert(dest != NULL);
|
||||||
|
assert(len > 0);
|
||||||
|
|
||||||
|
char *s = xstrdup(src);
|
||||||
|
size_t n;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
mbstate_t mbstate;
|
||||||
|
char *p = s;
|
||||||
|
|
||||||
|
memset(&mbstate, 0, sizeof(mbstate));
|
||||||
|
if ((n = mbsrtowcs(dest, (const char **) &p, len, &mbstate))
|
||||||
|
== (size_t) -1) {
|
||||||
|
if (errno == EILSEQ) {
|
||||||
|
// Illegal sequence detected: replace it and try again
|
||||||
|
*p = EILSEQ_REPL;
|
||||||
|
} else {
|
||||||
|
errno_exit(_("xmbstowcs: `%s'"), src);
|
||||||
|
}
|
||||||
|
} else if (p != NULL) {
|
||||||
|
// Multibyte string was too long: truncate dest
|
||||||
|
dest[len - 1] = '\0';
|
||||||
|
n--;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(s);
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/***********************************************************************/
|
||||||
|
// xwcrtomb: Convert a wide character to a multibyte sequence
|
||||||
|
|
||||||
|
size_t xwcrtomb (char *restrict dest, wchar_t wc, mbstate_t *restrict mbstate)
|
||||||
|
{
|
||||||
|
mbstate_t mbcopy;
|
||||||
|
size_t n;
|
||||||
|
|
||||||
|
|
||||||
|
assert(dest != NULL);
|
||||||
|
assert(mbstate != NULL);
|
||||||
|
|
||||||
|
memcpy(&mbcopy, mbstate, sizeof(mbcopy));
|
||||||
|
|
||||||
|
if ((n = wcrtomb(dest, wc, &mbcopy)) == (size_t) -1) {
|
||||||
|
if (errno == EILSEQ) {
|
||||||
|
/* wc cannot be represented in current locale.
|
||||||
|
|
||||||
|
Note that the shift state in mbcopy is now undefined.
|
||||||
|
Hence, restore the original, try to store an ending shift
|
||||||
|
sequence, then EILSEQ_REPL. */
|
||||||
|
memcpy(&mbcopy, mbstate, sizeof(mbcopy));
|
||||||
|
if ((n = wcrtomb(dest, '\0', &mbcopy)) == (size_t) -1) {
|
||||||
|
errno_exit(_("xwcrtomb: NUL"));
|
||||||
|
}
|
||||||
|
dest[n] = EILSEQ_REPL;
|
||||||
|
dest[n++] = '\0';
|
||||||
|
} else {
|
||||||
|
errno_exit(_("xwcrtomb: `%lc'"), wc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(mbstate, &mbcopy, sizeof(mbcopy));
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/***********************************************************************/
|
/***********************************************************************/
|
||||||
// End of file
|
// End of file
|
||||||
|
33
src/utils.h
33
src/utils.h
@ -343,4 +343,37 @@ extern char *xstrdup (const char *str);
|
|||||||
extern wchar_t *xwcsdup (const wchar_t *str);
|
extern wchar_t *xwcsdup (const wchar_t *str);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Function: xmbstowcs - Convert a multibyte string to a wide-character string
|
||||||
|
Parameters: dest - Location of wide-string buffer
|
||||||
|
src - String to convert
|
||||||
|
len - Size of dest, in multiples of wchar_t
|
||||||
|
Returns: size_t - Number of characters placed in dest (excluding NUL)
|
||||||
|
|
||||||
|
This wrapper function converts a multibyte string to a wide-character
|
||||||
|
one by calling mbrtowc() continually until the whole string is
|
||||||
|
converted. If any illegal sequences are present, they are converted to
|
||||||
|
the EILSEQ_REPL character. If the destination buffer is too small, the
|
||||||
|
string is truncated.
|
||||||
|
*/
|
||||||
|
extern size_t xmbstowcs (wchar_t *restrict dest, const char *restrict src,
|
||||||
|
size_t len);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Function: xwcrtomb - Convert a wide character to a multibyte sequence
|
||||||
|
Parameters: dest - Location of multibyte buffer (size >= MB_CUR_MAX + 1)
|
||||||
|
wc - Character to convert
|
||||||
|
mbstate - Pointer to current multibyte shift state
|
||||||
|
Returns: size_t - Number of characters placed in dest
|
||||||
|
|
||||||
|
This wrapper function converts the wide character in wc (which may be
|
||||||
|
NUL) by calling wcrtomb(). If wc cannot be represented in the current
|
||||||
|
locale, EILSEQ_REPL is used instead (with any characters needed to move
|
||||||
|
to an initial shift state prior to EILSEQ_REPL).
|
||||||
|
*/
|
||||||
|
extern size_t xwcrtomb (char *restrict dest, wchar_t wc,
|
||||||
|
mbstate_t *restrict mbstate);
|
||||||
|
|
||||||
|
|
||||||
#endif /* included_UTILS_H */
|
#endif /* included_UTILS_H */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user