mirror of
https://github.com/rkd77/elinks.git
synced 2025-06-30 22:19:29 -04:00
Added UTF-8 char length lookup table
Added lookup table to quick get number of bytes of UTF-8 character from first byte.
This commit is contained in:
parent
259a64a7a7
commit
0bacd766e2
@ -168,6 +168,21 @@ u2cp_(unicode_val_T u, int to, int no_nbsp_hack)
|
|||||||
return no_str;
|
return no_str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Number of bytes utf8 character indexed by first byte. Illegal bytes are
|
||||||
|
* equal ones and handled different. */
|
||||||
|
static char utf8char_len_tab[256] =
|
||||||
|
{
|
||||||
|
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||||
|
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||||
|
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||||
|
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||||
|
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||||
|
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
|
||||||
|
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
|
||||||
|
3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 5,5,5,5,6,6,1,1,
|
||||||
|
};
|
||||||
|
|
||||||
static unsigned char utf_buffer[7];
|
static unsigned char utf_buffer[7];
|
||||||
|
|
||||||
inline unsigned char *
|
inline unsigned char *
|
||||||
@ -205,6 +220,15 @@ encode_utf_8(unicode_val_T u)
|
|||||||
return utf_buffer;
|
return utf_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline int utf8charlen(const unsigned char *p)
|
||||||
|
{
|
||||||
|
int len;
|
||||||
|
if (p==NULL)
|
||||||
|
return 0;
|
||||||
|
len = utf8char_len_tab[*p];
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
inline int
|
inline int
|
||||||
strlen_utf8(unsigned char **str)
|
strlen_utf8(unsigned char **str)
|
||||||
{
|
{
|
||||||
@ -214,12 +238,7 @@ strlen_utf8(unsigned char **str)
|
|||||||
int len;
|
int len;
|
||||||
|
|
||||||
for (x = 0;; x++, s += len) {
|
for (x = 0;; x++, s += len) {
|
||||||
if (*s < 0x80) len = 1;
|
len = utf8charlen(s);
|
||||||
else if (*s < 0xe0) len = 2;
|
|
||||||
else if (*s < 0xf0) len = 3;
|
|
||||||
else if (*s < 0xf8) len = 4;
|
|
||||||
else if (*s < 0xfc) len = 5;
|
|
||||||
else len = 6;
|
|
||||||
if (s + len > end) break;
|
if (s + len > end) break;
|
||||||
}
|
}
|
||||||
*str = s;
|
*str = s;
|
||||||
@ -233,18 +252,7 @@ utf_8_to_unicode(unsigned char **string, unsigned char *end)
|
|||||||
unicode_val_T u;
|
unicode_val_T u;
|
||||||
int length;
|
int length;
|
||||||
|
|
||||||
if (str[0] < 0x80)
|
length = utf8char_len_tab[str[0]];
|
||||||
length = 1;
|
|
||||||
else if (str[0] < 0xe0)
|
|
||||||
length = 2;
|
|
||||||
else if (str[0] < 0xf0)
|
|
||||||
length = 3;
|
|
||||||
else if (str[0] < 0xf8)
|
|
||||||
length = 4;
|
|
||||||
else if (str[0] < 0xfc)
|
|
||||||
length = 5;
|
|
||||||
else
|
|
||||||
length = 6;
|
|
||||||
|
|
||||||
if (str + length > end) {
|
if (str + length > end) {
|
||||||
return UCS_NO_CHAR;
|
return UCS_NO_CHAR;
|
||||||
|
@ -54,6 +54,7 @@ unsigned char *get_cp_mime_name(int);
|
|||||||
int is_cp_special(int);
|
int is_cp_special(int);
|
||||||
void free_conv_table(void);
|
void free_conv_table(void);
|
||||||
inline unsigned char *encode_utf_8(unicode_val_T);
|
inline unsigned char *encode_utf_8(unicode_val_T);
|
||||||
|
inline int utf8charlen(const unsigned char *);
|
||||||
inline int strlen_utf8(unsigned char **);
|
inline int strlen_utf8(unsigned char **);
|
||||||
inline unicode_val_T utf_8_to_unicode(unsigned char **, unsigned char *);
|
inline unicode_val_T utf_8_to_unicode(unsigned char **, unsigned char *);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user