mirror of
https://github.com/rkd77/elinks.git
synced 2024-12-04 14:46:47 -05:00
Bug 879, set_hline: Discard U+00AD SOFT HYPHEN characters if UTF-8.
If not UTF-8, then charsets.c has already discarded them.
This commit is contained in:
parent
70dc594d93
commit
0c3a871a4b
@ -406,6 +406,21 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
|
|||||||
if (realloc_spaces(part, x + charslen))
|
if (realloc_spaces(part, x + charslen))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
/* U+00AD SOFT HYPHEN characters in HTML documents are
|
||||||
|
* supposed to be displayed only if the word is broken at that
|
||||||
|
* point. ELinks currently does not use them, so it should
|
||||||
|
* not display them. If the input @chars is in UTF-8, then
|
||||||
|
* set_hline() discards the characters. If the input is in
|
||||||
|
* some other charset, then set_hline() does not know which
|
||||||
|
* byte that charset uses for U+00AD, so it cannot discard
|
||||||
|
* the characters; instead, the translation table used by
|
||||||
|
* convert_string() has already discarded the characters.
|
||||||
|
*
|
||||||
|
* Likewise, if the input @chars is in UTF-8, then it may
|
||||||
|
* contain U+00A0 NO-BREAK SPACE characters; but if the input
|
||||||
|
* is in some other charset, then the translation table
|
||||||
|
* has mapped those characters to NBSP_CHAR. */
|
||||||
|
|
||||||
if (part->document) {
|
if (part->document) {
|
||||||
/* Reallocate LINE(y).chars[] to large enough. The
|
/* Reallocate LINE(y).chars[] to large enough. The
|
||||||
* last parameter of realloc_line is the index of the
|
* last parameter of realloc_line is the index of the
|
||||||
@ -424,7 +439,7 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
|
|||||||
if (orig_length < 0) /* error */
|
if (orig_length < 0) /* error */
|
||||||
return 0;
|
return 0;
|
||||||
if (utf8) {
|
if (utf8) {
|
||||||
unsigned char *end = chars + charslen;
|
unsigned char *const end = chars + charslen;
|
||||||
unicode_val_T data;
|
unicode_val_T data;
|
||||||
|
|
||||||
if (part->document->buf_length) {
|
if (part->document->buf_length) {
|
||||||
@ -459,7 +474,7 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (; chars < end; x++) {
|
while (chars < end) {
|
||||||
/* ELinks does not use NBSP_CHAR in UTF-8. */
|
/* ELinks does not use NBSP_CHAR in UTF-8. */
|
||||||
|
|
||||||
data = utf8_to_unicode(&chars, end);
|
data = utf8_to_unicode(&chars, end);
|
||||||
@ -473,7 +488,7 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
|
|||||||
schar->attr = SCREEN_ATTR_FRAME;
|
schar->attr = SCREEN_ATTR_FRAME;
|
||||||
copy_screen_chars(&POS(x, y), schar, 1);
|
copy_screen_chars(&POS(x, y), schar, 1);
|
||||||
schar->attr = attr;
|
schar->attr = attr;
|
||||||
part->char_width[x] = 0;
|
part->char_width[x++] = 0;
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
unsigned char i;
|
unsigned char i;
|
||||||
@ -486,6 +501,8 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
good_char:
|
good_char:
|
||||||
|
if (data == UCS_SOFT_HYPHEN)
|
||||||
|
continue;
|
||||||
if (data == UCS_NO_BREAK_SPACE
|
if (data == UCS_NO_BREAK_SPACE
|
||||||
&& html_context->options->wrap_nbsp)
|
&& html_context->options->wrap_nbsp)
|
||||||
data = UCS_SPACE;
|
data = UCS_SPACE;
|
||||||
@ -502,8 +519,8 @@ good_char:
|
|||||||
schar->data = (unicode_val_T)data;
|
schar->data = (unicode_val_T)data;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
copy_screen_chars(&POS(x, y), schar, 1);
|
copy_screen_chars(&POS(x++, y), schar, 1);
|
||||||
}
|
} /* while chars < end */
|
||||||
} else { /* not UTF-8 */
|
} else { /* not UTF-8 */
|
||||||
for (; charslen > 0; charslen--, x++, chars++) {
|
for (; charslen > 0; charslen--, x++, chars++) {
|
||||||
part->char_width[x] = 1;
|
part->char_width[x] = 1;
|
||||||
@ -535,13 +552,15 @@ good_char:
|
|||||||
len = x - x2;
|
len = x - x2;
|
||||||
} else { /* part->document == NULL */
|
} else { /* part->document == NULL */
|
||||||
if (utf8) {
|
if (utf8) {
|
||||||
unsigned char *end;
|
unsigned char *const end = chars + charslen;
|
||||||
|
|
||||||
for (end = chars + charslen; chars < end; x++) {
|
while (chars < end) {
|
||||||
unicode_val_T data;
|
unicode_val_T data;
|
||||||
|
|
||||||
part->spaces[x] = (*chars == ' ');
|
|
||||||
data = utf8_to_unicode(&chars, end);
|
data = utf8_to_unicode(&chars, end);
|
||||||
|
if (data == UCS_SOFT_HYPHEN)
|
||||||
|
continue;
|
||||||
|
part->spaces[x] = (data == UCS_SPACE);
|
||||||
part->char_width[x] = unicode_to_cell(data);
|
part->char_width[x] = unicode_to_cell(data);
|
||||||
if (part->char_width[x] == 2) {
|
if (part->char_width[x] == 2) {
|
||||||
x++;
|
x++;
|
||||||
@ -552,7 +571,8 @@ good_char:
|
|||||||
/* this is at the end only */
|
/* this is at the end only */
|
||||||
return x - x2;
|
return x - x2;
|
||||||
}
|
}
|
||||||
}
|
x++;
|
||||||
|
} /* while chars < end */
|
||||||
len = x - x2;
|
len = x - x2;
|
||||||
} else { /* not UTF-8 */
|
} else { /* not UTF-8 */
|
||||||
for (; charslen > 0; charslen--, x++, chars++) {
|
for (; charslen > 0; charslen--, x++, chars++) {
|
||||||
|
Loading…
Reference in New Issue
Block a user