1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-12-04 14:46:47 -05:00

Support for multibyte encodings using iconv.

New charsets must be added by analogy to Big5.
This commit is contained in:
Witold Filipczyk 2010-07-23 19:59:59 +02:00 committed by Witold Filipczyk
parent b50fd4e8e2
commit 6d567bb8ce
3 changed files with 126 additions and 2 deletions

View File

@ -18,6 +18,11 @@
#include <wctype.h> #include <wctype.h>
#endif #endif
#ifdef HAVE_ICONV
#include <errno.h>
#include <iconv.h>
#endif
#include "elinks.h" #include "elinks.h"
#include "document/options.h" #include "document/options.h"
@ -122,6 +127,10 @@ static const char strings[256][2] = {
"\370", "\371", "\372", "\373", "\374", "\375", "\376", "\377", "\370", "\371", "\372", "\373", "\374", "\375", "\376", "\377",
}; };
#ifdef HAVE_ICONV
static iconv_t iconv_cd = (iconv_t)-1;
#endif
static void static void
free_translation_table(struct conv_table *p) free_translation_table(struct conv_table *p)
{ {
@ -157,6 +166,7 @@ new_translation_table(struct conv_table *p)
p[i].t = 0; p[i].t = 0;
p[i].u.str = no_str; p[i].u.str = no_str;
} }
p->iconv_cp = -1;
} }
#define BIN_SEARCH(table, entry, entries, key, result) \ #define BIN_SEARCH(table, entry, entries, key, result) \
@ -952,6 +962,12 @@ free_conv_table(void)
first = 0; first = 0;
} }
new_translation_table(table); new_translation_table(table);
#ifdef HAVE_ICONV
if (iconv_cd != (iconv_t)-1) {
iconv_close(iconv_cd);
iconv_cd = (iconv_t)-1;
}
#endif
} }
@ -967,6 +983,14 @@ get_translation_table(int from, int to)
memset(table, 0, sizeof(table)); memset(table, 0, sizeof(table));
first = 0; first = 0;
} }
if (codepages[from].iconv) {
struct conv_table *table = get_translation_table_to_utf8(34);
if (table) table->iconv_cp = from;
return table;
}
if (/*from == to ||*/ from == -1 || to == -1) if (/*from == to ||*/ from == -1 || to == -1)
return NULL; return NULL;
if (is_cp_ptr_utf8(&codepages[to])) if (is_cp_ptr_utf8(&codepages[to]))
@ -1242,7 +1266,7 @@ end:
unsigned char * unsigned char *
convert_string(struct conv_table *convert_table, convert_string(struct conv_table *convert_table,
unsigned char *chars, int charslen, int cp, unsigned char *chars2, int charslen2, int cp,
enum convert_string_mode mode, int *length, enum convert_string_mode mode, int *length,
void (*callback)(void *data, unsigned char *buf, int buflen), void (*callback)(void *data, unsigned char *buf, int buflen),
void *callback_data) void *callback_data)
@ -1250,6 +1274,19 @@ convert_string(struct conv_table *convert_table,
unsigned char *buffer; unsigned char *buffer;
int bufferpos = 0; int bufferpos = 0;
int charspos = 0; int charspos = 0;
unsigned char *chars = chars2;
int charslen = charslen2;
#ifdef HAVE_ICONV
static char iconv_input[256];
static char iconv_output[256 * 8];
static size_t iconv_offset;
static int iconv_cp;
static size_t iconv_inleft;
size_t iconv_outleft = 256 * 8;
int loop = 0;
int is_iconv = 0;
int chars_offset = 0;
if (!convert_table && !memchr(chars, '&', charslen)) { if (!convert_table && !memchr(chars, '&', charslen)) {
if (callback) { if (callback) {
@ -1260,11 +1297,81 @@ convert_string(struct conv_table *convert_table,
} }
} }
if (cp >= 0) {
if (convert_table && convert_table->iconv_cp > 0) {
is_iconv = 1;
cp = convert_table->iconv_cp;
} else {
is_iconv = codepages[cp & ~SYSTEM_CHARSET_FLAG].iconv;
}
}
#endif
/* Buffer allocation */ /* Buffer allocation */
buffer = mem_alloc(ALLOC_GR + 1 /* trailing \0 */); buffer = mem_alloc(ALLOC_GR + 1 /* trailing \0 */);
if (!buffer) return NULL; if (!buffer) return NULL;
#ifdef HAVE_ICONV
if (is_iconv) {
int v;
size_t before, to_copy;
char *outp, *inp;
if (iconv_cd >= 0) {
if (cp != iconv_cp) {
iconv_close(iconv_cd);
iconv_cd = (iconv_t)-1;
}
}
if (iconv_cd == (iconv_t)-1) {
iconv_offset = 0;
iconv_cd = iconv_open("utf-8", get_cp_mime_name(cp));
if (iconv_cd == (iconv_t)-1) {
mem_free(buffer);
return NULL;
}
iconv_cp = cp;
}
repeat:
to_copy = charslen2 - chars_offset;
if (to_copy > 256 - iconv_offset) to_copy = 256 - iconv_offset;
memcpy(iconv_input + iconv_offset, chars + chars_offset, to_copy);
iconv_outleft = 256 * 8;
iconv_inleft = iconv_offset + to_copy;
inp = iconv_input;
outp = iconv_output;
before = iconv_inleft;
again:
v = iconv(iconv_cd, &inp, &iconv_inleft, &outp, &iconv_outleft);
chars_offset += before - iconv_inleft;
charslen = 256 * 8 - iconv_outleft;
chars = (unsigned char *)iconv_output;
charspos = 0;
if (v == -1) {
switch (errno) {
case EINVAL:
memcpy(iconv_input, inp, iconv_inleft);
iconv_offset = iconv_inleft;
break;
case EILSEQ:
chars_offset++;
iconv_inleft--;
inp++;
goto again;
break;
default:
iconv_offset = 0;
}
} else {
iconv_offset = 0;
}
loop = chars_offset < charslen2;
}
#endif
/* Iterate ;-) */ /* Iterate ;-) */
while (charspos < charslen) { while (charspos < charslen) {
@ -1359,6 +1466,9 @@ flush:
#undef PUTC #undef PUTC
} }
#ifdef HAVE_ICONV
if (loop) goto repeat;
#endif
/* Say bye */ /* Say bye */
buffer[bufferpos] = 0; buffer[bufferpos] = 0;

View File

@ -74,6 +74,7 @@ struct conv_table {
* table owns the nested conversion table. */ * table owns the nested conversion table. */
struct conv_table *tbl; struct conv_table *tbl;
} u; } u;
int iconv_cp;
}; };
enum convert_string_mode { enum convert_string_mode {

View File

@ -4884,6 +4884,18 @@ unsigned char *const aliases_utf8 [] = {
NULL NULL
}; };
/*** Big5 ***/
#define highhalf_big5 highhalf_NULL
#define table_big5 table_NULL
unsigned char *const aliases_big5 [] = {
"Big5",
NULL
};
/*** NULL ***/ /*** NULL ***/
@ -4947,7 +4959,8 @@ const struct codepage_desc codepages [] = {
{"TCVN-5712", aliases_tcvn5712, highhalf_tcvn5712, table_tcvn5712, 0}, {"TCVN-5712", aliases_tcvn5712, highhalf_tcvn5712, table_tcvn5712, 0},
{"VISCII", aliases_viscii, highhalf_viscii, table_viscii, 0}, {"VISCII", aliases_viscii, highhalf_viscii, table_viscii, 0},
{"Unicode UTF-8", aliases_utf8, highhalf_utf8, table_utf8, 0}, {"Unicode UTF-8", aliases_utf8, highhalf_utf8, table_utf8, 0},
{"Big5", aliases_big5, highhalf_big5, table_big5, 1},
{NULL, NULL, NULL, 0} {NULL, NULL, NULL, 0}
}; };
#define N_CODEPAGES 35 #define N_CODEPAGES 36