1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-12-04 14:46:47 -05:00

combined: Added combining characters support.

Combining characters requires a UTF-8 locale.
It slows down rendering. There is still the unresolved issue with
combining characters at the end of a document.
This patch wasn't heavilly tested. Especially a "garbage" input may cause
unpredictable results.
This commit is contained in:
Witold Filipczyk 2008-01-03 13:03:08 +01:00 committed by Kalle Olavi Niemitalo
parent 560818568a
commit 83a4d815ae
3 changed files with 53 additions and 12 deletions

View File

@ -57,6 +57,10 @@ init_document(struct cache_entry *cached, struct document_options *options)
init_list(document->onload_snippets);
#endif
#ifdef CONFIG_UTF8
document->comb_x = -1;
document->comb_y = -1;
#endif
object_nolock(document, "document");
object_lock(document);

View File

@ -201,6 +201,12 @@ struct document {
#ifdef CONFIG_UTF8
unsigned char buf[7];
unsigned char buf_length;
/* base char + 5 combining chars = 6 */
unicode_val_T combi[UCS_MAX_LENGTH_COMBINED];
/* the number of combining characters. The base char is not counted. */
unsigned int combi_length;
/* Positions of the last base character.*/
int comb_x, comb_y;
#endif
unsigned int id; /**< Used to check cache entries. */

View File

@ -8,6 +8,11 @@
#include <stdarg.h>
#include <string.h>
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCWIDTH)
#define __USE_XOPEN
#include <wchar.h>
#endif
#include "elinks.h"
#include "cache/cache.h"
@ -393,6 +398,7 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
* has mapped those characters to NBSP_CHAR. */
if (part->document) {
struct document *const document = part->document;
/* Reallocate LINE(y).chars[] to large enough. The
* last parameter of realloc_line is the index of the
* last element to which we may want to write,
@ -402,10 +408,10 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
* (All double-cell characters take up at least two
* bytes in UTF-8, and there are no triple-cell or
* wider characters.) However, if there already is an
* incomplete character in part->document->buf, then
* incomplete character in document->buf, then
* the first byte of input can result in a double-cell
* character, so we must reserve one extra element. */
orig_length = realloc_line(html_context, part->document,
orig_length = realloc_line(html_context, document,
Y(y), X(x) + charslen);
if (orig_length < 0) /* error */
return 0;
@ -413,17 +419,17 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
unsigned char *const end = chars + charslen;
unicode_val_T data;
if (part->document->buf_length) {
if (document->buf_length) {
/* previous char was broken in the middle */
int length = utf8charlen(part->document->buf);
int length = utf8charlen(document->buf);
unsigned char i;
unsigned char *buf_ptr = part->document->buf;
unsigned char *buf_ptr = document->buf;
for (i = part->document->buf_length; i < length && chars < end;) {
part->document->buf[i++] = *chars++;
for (i = document->buf_length; i < length && chars < end;) {
document->buf[i++] = *chars++;
}
part->document->buf_length = i;
part->document->buf[i] = '\0';
document->buf_length = i;
document->buf[i] = '\0';
data = utf8_to_unicode(&buf_ptr, buf_ptr + i);
if (data != UCS_NO_CHAR) {
/* FIXME: If there was invalid
@ -436,7 +442,7 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
* trivial to implement because
* each byte may have arrived in
* a separate call. */
part->document->buf_length = 0;
document->buf_length = 0;
goto good_char;
} else {
/* Still not full char */
@ -465,9 +471,9 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
unsigned char i;
for (i = 0; chars < end;i++) {
part->document->buf[i] = *chars++;
document->buf[i] = *chars++;
}
part->document->buf_length = i;
document->buf_length = i;
break;
}
/* not reached */
@ -480,6 +486,27 @@ good_char:
if (data == UCS_NO_BREAK_SPACE
&& html_context->options->wrap_nbsp)
data = UCS_SPACE;
#ifdef HAVE_WCWIDTH
if (wcwidth((wchar_t)data)) {
if (document->combi_length) {
if (document->comb_x != -1) {
unicode_val_T prev = get_combined(document->combi, document->combi_length + 1);
if (prev != UCS_NO_CHAR) {
schar->data = prev;
copy_screen_chars(&POS(document->comb_x, document->comb_y), schar, 1);
}
}
document->combi_length = 0;
}
document->combi[0] = data;
} else {
if (document->combi_length < (UCS_MAX_LENGTH_COMBINED - 1)) {
document->combi[++document->combi_length] = data;
}
continue;
}
#endif
part->spaces[x] = (data == UCS_SPACE);
if (unicode_to_cell(data) == 2) {
@ -493,6 +520,10 @@ good_char:
part->char_width[x] = unicode_to_cell(data);
schar->data = (unicode_val_T)data;
}
#ifdef HAVE_WCWIDTH
document->comb_x = x;
document->comb_y = y;
#endif
copy_screen_chars(&POS(x++, y), schar, 1);
} /* while chars < end */
} else { /* not UTF-8 */