1
0
mirror of https://github.com/rkd77/elinks.git synced 2025-01-03 14:57:44 -05:00

Added functions for manipulating with UTF-8 strings.

This commit is contained in:
Pavol Babincak 2006-03-05 00:10:33 +01:00 committed by Pavol Babincak
parent f4a430e480
commit 79d4d74a22
2 changed files with 86 additions and 0 deletions

View File

@ -252,6 +252,89 @@ strlen_utf8(unsigned char **str)
return x; return x;
} }
/* Count number of standard terminal cells needed for displaying UTF-8
* character. */
int
utf8_char2cells(unsigned char *utf8_char, unsigned char *end)
{
unicode_val_T u;
if (end == NULL)
end = strchr(utf8_char, '\0');
if(!utf8_char || !end)
return -1;
u = utf_8_to_unicode(&utf8_char, end);
return unicode_to_cell(u);
}
/* Count number of standard terminal cells needed for displaying string
* with UTF-8 characters. */
int
utf8_ptr2cells(unsigned char *string, unsigned char *end)
{
int charlen, cell, cells = 0;
if (end == NULL)
end = strchr(string, '\0');
if(!string || !end)
return -1;
do {
charlen = utf8charlen(string);
if (string + charlen > end)
break;
cell = utf8_char2cells(string, end);
if (cell < 0)
return -1;
cells += cell;
string += charlen;
} while (1);
return cells;
}
/*
* Count number of bytes from begining of the string needed for displaying
* specified number of cells.
*/
int
utf8_cells2bytes(unsigned char *string, int max_cells, unsigned char *end)
{
unsigned int bytes = 0, cells = 0;
assert(max_cells>=0);
if (end == NULL)
end = strchr(string, '\0');
if(!string || !end)
return -1;
do {
int cell = utf8_char2cells(&string[bytes], end);
if (cell < 0)
return -1;
cells += cell;
if (cells > max_cells)
break;
bytes += utf8charlen(&string[bytes]);
if (string + bytes > end) {
bytes = end - string;
break;
}
} while(1);
return bytes;
}
/* /*
* Find out number of standard terminal collumns needed for displaying symbol * Find out number of standard terminal collumns needed for displaying symbol

View File

@ -56,6 +56,9 @@ void free_conv_table(void);
#ifdef CONFIG_UTF_8 #ifdef CONFIG_UTF_8
inline unsigned char *encode_utf_8(unicode_val_T); inline unsigned char *encode_utf_8(unicode_val_T);
inline int utf8charlen(const unsigned char *); inline int utf8charlen(const unsigned char *);
int utf8_char2cells(unsigned char *, unsigned char *);
int utf8_ptr2cells(unsigned char *, unsigned char *);
int utf8_cells2bytes(unsigned char *, int, unsigned char *);
inline int unicode_to_cell(unicode_val_T); inline int unicode_to_cell(unicode_val_T);
inline int strlen_utf8(unsigned char **); inline int strlen_utf8(unsigned char **);
inline unicode_val_T utf_8_to_unicode(unsigned char **, unsigned char *); inline unicode_val_T utf_8_to_unicode(unsigned char **, unsigned char *);