Fix document.html.wrap_nbsp in UTF-8 terminals.

!CONFIG_UTF8, ISO-8859-1 doc, ASCII terminal, UTF-8 or unibyte I/O:   ok,   ok,   ok, A0 ok !CONFIG_UTF8, ISO-8859-1 doc, ISO-8859-1 terminal, UTF-8 or unibyte I/O:   ok,   ok,   ok, A0 ok !CONFIG_UTF8, UTF-8 doc, ASCII terminal, UTF-8 or unibyte I/O:   ok,   ok,   ok, C2 A0 fail (drawn as "\001"). !CONFIG_UTF8, UTF-8 doc, ISO-8859-1 terminal, UTF-8 or unibyte I/O:   ok,   ok,   ok, C2 A0 fail (not wrapped). CONFIG_UTF8, ISO-8859-1 doc, ASCII terminal, UTF-8 or unibyte I/O:   ok,   ok,   ok, A0 ok CONFIG_UTF8, ISO-8859-1 doc, ISO-8859-1 terminal, UTF-8 or unibyte I/O:   ok,   ok,   ok, A0 ok CONFIG_UTF8, ISO-8859-1 doc, UTF-8 terminal, UTF-8 I/O: all fail (not wrapped); after patch all ok. CONFIG_UTF8, UTF-8 doc, ASCII terminal, UTF-8 or unibyte I/O:   ok,   ok,   ok, C2 A0 fail (drawn as "\001"). CONFIG_UTF8, UTF-8 doc, ISO-8859-1 terminal, UTF-8 or unibyte I/O:   ok,   ok,   ok, C2 A0 fail (not wrapped) CONFIG_UTF8, UTF-8 doc, UTF-8 terminal, UTF-8 I/O: all fail (not wrapped); after patch all ok.
2025-06-30 22:19:29 -04:00 · 2007-01-30 10:21:12 +02:00 · 2007-01-30 10:21:12 +02:00 · ebf549ba77
commit ebf549ba77
parent 419857dce6
2 changed files with 13 additions and 7 deletions
--- a/src/document/html/renderer.c
+++ b/src/document/html/renderer.c
@ -460,15 +460,14 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
 			}

 			for (; chars < end; x++) {
-				if (*chars == NBSP_CHAR) {
-					schar->data = ' ';
-					part->spaces[x] = html_context->options->wrap_nbsp;
-					part->char_width[x] = 1;
-					chars++;
-				} else {
-					part->spaces[x] = (*chars == ' ');
+				/* ELinks does not use NBSP_CHAR in UTF-8.  */
+
+				/* The following is temporarily indented
+				 * just to make the diff easier to read.  */
+				{
 					data = utf8_to_unicode(&chars, end);
 					if (data == UCS_NO_CHAR) {
+						part->spaces[x] = 0;
 						if (charslen == 1) {
 							/* HR */
 							unsigned char attr = schar->attr;
@ -490,6 +489,10 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
 						}
 					} else {
 good_char:
+						if (data == UCS_NO_BREAK_SPACE
+						    && html_context->options->wrap_nbsp)
+							data = UCS_SPACE;
+						part->spaces[x] = (data == UCS_SPACE);
 						if (unicode_to_cell(data) == 2) {
 							schar->data = (unicode_val_T)data;
 							part->char_width[x] = 2;
--- a/src/intl/charsets.h
+++ b/src/intl/charsets.h
@ -7,6 +7,9 @@ typedef uint32_t unicode_val_T;
 * shouldn't rely on that.  */
 #define UCS_SPACE ((unicode_val_T) 0x0020)

+/* U+00A0 NO-BREAK SPACE.  */
+#define UCS_NO_BREAK_SPACE ((unicode_val_T) 0x00A0)
+
 /* U+FFFD REPLACEMENT CHARACTER.  Used when no Unicode mapping is
 * known for a byte in a codepage, or when invalid UTF-8 is received
 * from a terminal.  After generating the character, ELinks then