1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-09-29 03:17:53 -04:00

Added UTF-8 char length lookup table

Added lookup table to quick get number of bytes of UTF-8 character from
first byte.
This commit is contained in:
Pavol Babincak 2006-01-31 01:09:49 +01:00 committed by Jonas Fonseca
parent 259a64a7a7
commit 0bacd766e2
2 changed files with 27 additions and 18 deletions

View File

@ -168,6 +168,21 @@ u2cp_(unicode_val_T u, int to, int no_nbsp_hack)
return no_str;
}
/* Number of bytes utf8 character indexed by first byte. Illegal bytes are
* equal ones and handled different. */
static char utf8char_len_tab[256] =
{
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 5,5,5,5,6,6,1,1,
};
static unsigned char utf_buffer[7];
inline unsigned char *
@ -205,6 +220,15 @@ encode_utf_8(unicode_val_T u)
return utf_buffer;
}
inline int utf8charlen(const unsigned char *p)
{
int len;
if (p==NULL)
return 0;
len = utf8char_len_tab[*p];
return len;
}
inline int
strlen_utf8(unsigned char **str)
{
@ -214,12 +238,7 @@ strlen_utf8(unsigned char **str)
int len;
for (x = 0;; x++, s += len) {
if (*s < 0x80) len = 1;
else if (*s < 0xe0) len = 2;
else if (*s < 0xf0) len = 3;
else if (*s < 0xf8) len = 4;
else if (*s < 0xfc) len = 5;
else len = 6;
len = utf8charlen(s);
if (s + len > end) break;
}
*str = s;
@ -233,18 +252,7 @@ utf_8_to_unicode(unsigned char **string, unsigned char *end)
unicode_val_T u;
int length;
if (str[0] < 0x80)
length = 1;
else if (str[0] < 0xe0)
length = 2;
else if (str[0] < 0xf0)
length = 3;
else if (str[0] < 0xf8)
length = 4;
else if (str[0] < 0xfc)
length = 5;
else
length = 6;
length = utf8char_len_tab[str[0]];
if (str + length > end) {
return UCS_NO_CHAR;

View File

@ -54,6 +54,7 @@ unsigned char *get_cp_mime_name(int);
int is_cp_special(int);
void free_conv_table(void);
inline unsigned char *encode_utf_8(unicode_val_T);
inline int utf8charlen(const unsigned char *);
inline int strlen_utf8(unsigned char **);
inline unicode_val_T utf_8_to_unicode(unsigned char **, unsigned char *);