1
0
mirror of https://github.com/irssi/irssi.git synced 2024-12-04 14:46:39 -05:00
irssi/src/fe-text/utf8.c

178 lines
4.0 KiB
C
Raw Normal View History

/* utf8.c - Operations on UTF-8 strings.
*
* Copyright (C) 2002 Timo Sirainen
*
* Based on GLib code by
*
* Copyright (C) 1999 Tom Tromey
* Copyright (C) 2000 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#include "module.h"
#define UTF8_COMPUTE(Char, Mask, Len) \
if (Char < 128) \
{ \
Len = 1; \
Mask = 0x7f; \
} \
else if ((Char & 0xe0) == 0xc0) \
{ \
Len = 2; \
Mask = 0x1f; \
} \
else if ((Char & 0xf0) == 0xe0) \
{ \
Len = 3; \
Mask = 0x0f; \
} \
else if ((Char & 0xf8) == 0xf0) \
{ \
Len = 4; \
Mask = 0x07; \
} \
else if ((Char & 0xfc) == 0xf8) \
{ \
Len = 5; \
Mask = 0x03; \
} \
else if ((Char & 0xfe) == 0xfc) \
{ \
Len = 6; \
Mask = 0x01; \
} \
else \
Len = -1;
#define UTF8_GET(Result, Chars, Count, Mask, Len) \
(Result) = (Chars)[0] & (Mask); \
for ((Count) = 1; (Count) < (Len); ++(Count)) \
{ \
if (((Chars)[(Count)] & 0xc0) != 0x80) \
{ \
(Result) = -1; \
break; \
} \
(Result) <<= 6; \
(Result) |= ((Chars)[(Count)] & 0x3f); \
}
unichar get_utf8_char(const unsigned char **ptr, int len)
{
int i, result, mask, chrlen;
mask = 0;
UTF8_COMPUTE(**ptr, mask, chrlen);
if (len == -1)
return (unichar) -2;
if (chrlen > len)
return (unichar) -1;
UTF8_GET(result, *ptr, i, mask, len);
if (result == -1)
return (unichar) -2;
*ptr += len-1;
return result;
}
int strlen_utf8(const char *str)
{
const unsigned char *p = (const unsigned char *) str;
int len;
len = 0;
while (*p != '\0' && get_utf8_char(&p, 6) > 0) {
len++;
p++;
}
return len;
}
int utf16_char_to_utf8(unichar c, unsigned char *outbuf)
{
int len, i, first;
len = 0;
if (c < 0x80) {
first = 0;
len = 1;
} else if (c < 0x800) {
first = 0xc0;
len = 2;
} else if (c < 0x10000) {
first = 0xe0;
len = 3;
} else if (c < 0x200000) {
first = 0xf0;
len = 4;
} else if (c < 0x4000000) {
first = 0xf8;
len = 5;
} else {
first = 0xfc;
len = 6;
}
if (outbuf) {
for (i = len - 1; i > 0; --i) {
outbuf[i] = (c & 0x3f) | 0x80;
c >>= 6;
}
outbuf[0] = c | first;
}
return len;
}
void utf8_to_utf16(const char *str, unichar *out)
{
const unsigned char *p = (const unsigned char *) str;
int i, result, mask, len;
while (*p != '\0') {
mask = 0;
UTF8_COMPUTE(*p, mask, len);
if (len == -1)
break;
UTF8_GET(result, p, i, mask, len);
if (result == -1)
break;
p += len;
*out++ = result;
}
*out = '\0';
}
void utf16_to_utf8(const unichar *str, char *out)
{
int len;
while (*str != '\0') {
len = utf16_char_to_utf8(*str, out);
out += len;
str++;
}
*out = '\0';
}