diff --git a/src/intl/charsets.c b/src/intl/charsets.c index b51a6189..c8702927 100644 --- a/src/intl/charsets.c +++ b/src/intl/charsets.c @@ -183,7 +183,8 @@ u2cp_(unicode_val_T u, int to, enum nbsp_mode nbsp_mode) return encode_utf8(u); #endif /* CONFIG_UTF8 */ - /* To mark non breaking spaces, we use a special char NBSP_CHAR. */ + /* To mark non breaking spaces in non-UTF-8 strings, we use a + * special char NBSP_CHAR. */ if (u == 0xa0) { if (nbsp_mode == NBSP_MODE_HACK) return NBSP_CHAR_STRING; else /* NBSP_MODE_ASCII */ return " "; diff --git a/src/intl/charsets.h b/src/intl/charsets.h index 69fb3cb9..66768b7e 100644 --- a/src/intl/charsets.h +++ b/src/intl/charsets.h @@ -31,7 +31,8 @@ typedef uint32_t unicode_val_T; * We should fix that if we ever change the value. */ #define UCS_ORPHAN_CELL ((unicode_val_T) 0x20) -/*   replacement character. See u2cp(). */ +/*   replacement character. See u2cp(). + * UTF-8 strings should use the encoding of U+00A0 instead. */ #define NBSP_CHAR ((unsigned char) 1) #define NBSP_CHAR_STRING "\001" @@ -64,7 +65,8 @@ enum convert_string_mode { CSM_NONE, /* Convert nothing. */ }; -/* How to translate non-breaking spaces. */ +/* How to translate U+00A0 NO-BREAK SPACE. If u2cp_ is converting to + * UTF-8, it ignores this choice and just encodes the U+00A0. */ enum nbsp_mode { /* Convert to NBSP_CHAR. This lets the HTML renderer * recognize nbsp even if the codepage doesn't support