1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-12-04 14:46:47 -05:00

Bug 879, set_hline: Discard U+00AD SOFT HYPHEN characters if UTF-8.

If not UTF-8, then charsets.c has already discarded them.
This commit is contained in:
Kalle Olavi Niemitalo 2007-04-26 07:12:28 +03:00 committed by Kalle Olavi Niemitalo
parent 70dc594d93
commit 0c3a871a4b

View File

@ -406,6 +406,21 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
if (realloc_spaces(part, x + charslen)) if (realloc_spaces(part, x + charslen))
return 0; return 0;
/* U+00AD SOFT HYPHEN characters in HTML documents are
* supposed to be displayed only if the word is broken at that
* point. ELinks currently does not use them, so it should
* not display them. If the input @chars is in UTF-8, then
* set_hline() discards the characters. If the input is in
* some other charset, then set_hline() does not know which
* byte that charset uses for U+00AD, so it cannot discard
* the characters; instead, the translation table used by
* convert_string() has already discarded the characters.
*
* Likewise, if the input @chars is in UTF-8, then it may
* contain U+00A0 NO-BREAK SPACE characters; but if the input
* is in some other charset, then the translation table
* has mapped those characters to NBSP_CHAR. */
if (part->document) { if (part->document) {
/* Reallocate LINE(y).chars[] to large enough. The /* Reallocate LINE(y).chars[] to large enough. The
* last parameter of realloc_line is the index of the * last parameter of realloc_line is the index of the
@ -424,7 +439,7 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
if (orig_length < 0) /* error */ if (orig_length < 0) /* error */
return 0; return 0;
if (utf8) { if (utf8) {
unsigned char *end = chars + charslen; unsigned char *const end = chars + charslen;
unicode_val_T data; unicode_val_T data;
if (part->document->buf_length) { if (part->document->buf_length) {
@ -459,7 +474,7 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
} }
} }
for (; chars < end; x++) { while (chars < end) {
/* ELinks does not use NBSP_CHAR in UTF-8. */ /* ELinks does not use NBSP_CHAR in UTF-8. */
data = utf8_to_unicode(&chars, end); data = utf8_to_unicode(&chars, end);
@ -473,7 +488,7 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
schar->attr = SCREEN_ATTR_FRAME; schar->attr = SCREEN_ATTR_FRAME;
copy_screen_chars(&POS(x, y), schar, 1); copy_screen_chars(&POS(x, y), schar, 1);
schar->attr = attr; schar->attr = attr;
part->char_width[x] = 0; part->char_width[x++] = 0;
continue; continue;
} else { } else {
unsigned char i; unsigned char i;
@ -486,6 +501,8 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
} }
} else { } else {
good_char: good_char:
if (data == UCS_SOFT_HYPHEN)
continue;
if (data == UCS_NO_BREAK_SPACE if (data == UCS_NO_BREAK_SPACE
&& html_context->options->wrap_nbsp) && html_context->options->wrap_nbsp)
data = UCS_SPACE; data = UCS_SPACE;
@ -502,8 +519,8 @@ good_char:
schar->data = (unicode_val_T)data; schar->data = (unicode_val_T)data;
} }
} }
copy_screen_chars(&POS(x, y), schar, 1); copy_screen_chars(&POS(x++, y), schar, 1);
} } /* while chars < end */
} else { /* not UTF-8 */ } else { /* not UTF-8 */
for (; charslen > 0; charslen--, x++, chars++) { for (; charslen > 0; charslen--, x++, chars++) {
part->char_width[x] = 1; part->char_width[x] = 1;
@ -535,13 +552,15 @@ good_char:
len = x - x2; len = x - x2;
} else { /* part->document == NULL */ } else { /* part->document == NULL */
if (utf8) { if (utf8) {
unsigned char *end; unsigned char *const end = chars + charslen;
for (end = chars + charslen; chars < end; x++) { while (chars < end) {
unicode_val_T data; unicode_val_T data;
part->spaces[x] = (*chars == ' ');
data = utf8_to_unicode(&chars, end); data = utf8_to_unicode(&chars, end);
if (data == UCS_SOFT_HYPHEN)
continue;
part->spaces[x] = (data == UCS_SPACE);
part->char_width[x] = unicode_to_cell(data); part->char_width[x] = unicode_to_cell(data);
if (part->char_width[x] == 2) { if (part->char_width[x] == 2) {
x++; x++;
@ -552,7 +571,8 @@ good_char:
/* this is at the end only */ /* this is at the end only */
return x - x2; return x - x2;
} }
} x++;
} /* while chars < end */
len = x - x2; len = x - x2;
} else { /* not UTF-8 */ } else { /* not UTF-8 */
for (; charslen > 0; charslen--, x++, chars++) { for (; charslen > 0; charslen--, x++, chars++) {