mirror of
https://github.com/rkd77/elinks.git
synced 2025-02-02 15:09:23 -05:00
combined: Added combining characters support.
Combining characters requires a UTF-8 locale. It slows down rendering. There is still the unresolved issue with combining characters at the end of a document. This patch wasn't heavilly tested. Especially a "garbage" input may cause unpredictable results.
This commit is contained in:
parent
560818568a
commit
83a4d815ae
@ -57,6 +57,10 @@ init_document(struct cache_entry *cached, struct document_options *options)
|
|||||||
init_list(document->onload_snippets);
|
init_list(document->onload_snippets);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_UTF8
|
||||||
|
document->comb_x = -1;
|
||||||
|
document->comb_y = -1;
|
||||||
|
#endif
|
||||||
object_nolock(document, "document");
|
object_nolock(document, "document");
|
||||||
object_lock(document);
|
object_lock(document);
|
||||||
|
|
||||||
|
@ -201,6 +201,12 @@ struct document {
|
|||||||
#ifdef CONFIG_UTF8
|
#ifdef CONFIG_UTF8
|
||||||
unsigned char buf[7];
|
unsigned char buf[7];
|
||||||
unsigned char buf_length;
|
unsigned char buf_length;
|
||||||
|
/* base char + 5 combining chars = 6 */
|
||||||
|
unicode_val_T combi[UCS_MAX_LENGTH_COMBINED];
|
||||||
|
/* the number of combining characters. The base char is not counted. */
|
||||||
|
unsigned int combi_length;
|
||||||
|
/* Positions of the last base character.*/
|
||||||
|
int comb_x, comb_y;
|
||||||
#endif
|
#endif
|
||||||
unsigned int id; /**< Used to check cache entries. */
|
unsigned int id; /**< Used to check cache entries. */
|
||||||
|
|
||||||
|
@ -8,6 +8,11 @@
|
|||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCWIDTH)
|
||||||
|
#define __USE_XOPEN
|
||||||
|
#include <wchar.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "elinks.h"
|
#include "elinks.h"
|
||||||
|
|
||||||
#include "cache/cache.h"
|
#include "cache/cache.h"
|
||||||
@ -393,6 +398,7 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
|
|||||||
* has mapped those characters to NBSP_CHAR. */
|
* has mapped those characters to NBSP_CHAR. */
|
||||||
|
|
||||||
if (part->document) {
|
if (part->document) {
|
||||||
|
struct document *const document = part->document;
|
||||||
/* Reallocate LINE(y).chars[] to large enough. The
|
/* Reallocate LINE(y).chars[] to large enough. The
|
||||||
* last parameter of realloc_line is the index of the
|
* last parameter of realloc_line is the index of the
|
||||||
* last element to which we may want to write,
|
* last element to which we may want to write,
|
||||||
@ -402,10 +408,10 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
|
|||||||
* (All double-cell characters take up at least two
|
* (All double-cell characters take up at least two
|
||||||
* bytes in UTF-8, and there are no triple-cell or
|
* bytes in UTF-8, and there are no triple-cell or
|
||||||
* wider characters.) However, if there already is an
|
* wider characters.) However, if there already is an
|
||||||
* incomplete character in part->document->buf, then
|
* incomplete character in document->buf, then
|
||||||
* the first byte of input can result in a double-cell
|
* the first byte of input can result in a double-cell
|
||||||
* character, so we must reserve one extra element. */
|
* character, so we must reserve one extra element. */
|
||||||
orig_length = realloc_line(html_context, part->document,
|
orig_length = realloc_line(html_context, document,
|
||||||
Y(y), X(x) + charslen);
|
Y(y), X(x) + charslen);
|
||||||
if (orig_length < 0) /* error */
|
if (orig_length < 0) /* error */
|
||||||
return 0;
|
return 0;
|
||||||
@ -413,17 +419,17 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
|
|||||||
unsigned char *const end = chars + charslen;
|
unsigned char *const end = chars + charslen;
|
||||||
unicode_val_T data;
|
unicode_val_T data;
|
||||||
|
|
||||||
if (part->document->buf_length) {
|
if (document->buf_length) {
|
||||||
/* previous char was broken in the middle */
|
/* previous char was broken in the middle */
|
||||||
int length = utf8charlen(part->document->buf);
|
int length = utf8charlen(document->buf);
|
||||||
unsigned char i;
|
unsigned char i;
|
||||||
unsigned char *buf_ptr = part->document->buf;
|
unsigned char *buf_ptr = document->buf;
|
||||||
|
|
||||||
for (i = part->document->buf_length; i < length && chars < end;) {
|
for (i = document->buf_length; i < length && chars < end;) {
|
||||||
part->document->buf[i++] = *chars++;
|
document->buf[i++] = *chars++;
|
||||||
}
|
}
|
||||||
part->document->buf_length = i;
|
document->buf_length = i;
|
||||||
part->document->buf[i] = '\0';
|
document->buf[i] = '\0';
|
||||||
data = utf8_to_unicode(&buf_ptr, buf_ptr + i);
|
data = utf8_to_unicode(&buf_ptr, buf_ptr + i);
|
||||||
if (data != UCS_NO_CHAR) {
|
if (data != UCS_NO_CHAR) {
|
||||||
/* FIXME: If there was invalid
|
/* FIXME: If there was invalid
|
||||||
@ -436,7 +442,7 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
|
|||||||
* trivial to implement because
|
* trivial to implement because
|
||||||
* each byte may have arrived in
|
* each byte may have arrived in
|
||||||
* a separate call. */
|
* a separate call. */
|
||||||
part->document->buf_length = 0;
|
document->buf_length = 0;
|
||||||
goto good_char;
|
goto good_char;
|
||||||
} else {
|
} else {
|
||||||
/* Still not full char */
|
/* Still not full char */
|
||||||
@ -465,9 +471,9 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
|
|||||||
unsigned char i;
|
unsigned char i;
|
||||||
|
|
||||||
for (i = 0; chars < end;i++) {
|
for (i = 0; chars < end;i++) {
|
||||||
part->document->buf[i] = *chars++;
|
document->buf[i] = *chars++;
|
||||||
}
|
}
|
||||||
part->document->buf_length = i;
|
document->buf_length = i;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
/* not reached */
|
/* not reached */
|
||||||
@ -480,6 +486,27 @@ good_char:
|
|||||||
if (data == UCS_NO_BREAK_SPACE
|
if (data == UCS_NO_BREAK_SPACE
|
||||||
&& html_context->options->wrap_nbsp)
|
&& html_context->options->wrap_nbsp)
|
||||||
data = UCS_SPACE;
|
data = UCS_SPACE;
|
||||||
|
#ifdef HAVE_WCWIDTH
|
||||||
|
if (wcwidth((wchar_t)data)) {
|
||||||
|
if (document->combi_length) {
|
||||||
|
if (document->comb_x != -1) {
|
||||||
|
unicode_val_T prev = get_combined(document->combi, document->combi_length + 1);
|
||||||
|
|
||||||
|
if (prev != UCS_NO_CHAR) {
|
||||||
|
schar->data = prev;
|
||||||
|
copy_screen_chars(&POS(document->comb_x, document->comb_y), schar, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
document->combi_length = 0;
|
||||||
|
}
|
||||||
|
document->combi[0] = data;
|
||||||
|
} else {
|
||||||
|
if (document->combi_length < (UCS_MAX_LENGTH_COMBINED - 1)) {
|
||||||
|
document->combi[++document->combi_length] = data;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
part->spaces[x] = (data == UCS_SPACE);
|
part->spaces[x] = (data == UCS_SPACE);
|
||||||
|
|
||||||
if (unicode_to_cell(data) == 2) {
|
if (unicode_to_cell(data) == 2) {
|
||||||
@ -493,6 +520,10 @@ good_char:
|
|||||||
part->char_width[x] = unicode_to_cell(data);
|
part->char_width[x] = unicode_to_cell(data);
|
||||||
schar->data = (unicode_val_T)data;
|
schar->data = (unicode_val_T)data;
|
||||||
}
|
}
|
||||||
|
#ifdef HAVE_WCWIDTH
|
||||||
|
document->comb_x = x;
|
||||||
|
document->comb_y = y;
|
||||||
|
#endif
|
||||||
copy_screen_chars(&POS(x++, y), schar, 1);
|
copy_screen_chars(&POS(x++, y), schar, 1);
|
||||||
} /* while chars < end */
|
} /* while chars < end */
|
||||||
} else { /* not UTF-8 */
|
} else { /* not UTF-8 */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user