diff --git a/configure.in b/configure.in index e0980d36..1748e1dc 100644 --- a/configure.in +++ b/configure.in @@ -168,7 +168,7 @@ AC_HEADER_STDC AC_HEADER_SYS_WAIT AC_HEADER_TIME -AC_CHECK_HEADERS(wctype.h wchar.h) +AC_CHECK_HEADERS(wctype.h) AC_CHECK_HEADERS(fcntl.h limits.h time.h unistd.h) AC_CHECK_HEADERS(sigaction.h) AC_CHECK_HEADERS(arpa/inet.h) @@ -279,7 +279,7 @@ AC_FUNC_MMAP AC_FUNC_STRFTIME AC_CHECK_FUNCS(cfmakeraw gethostbyaddr herror strerror) AC_CHECK_FUNCS(popen uname access chmod alarm timegm mremap) -AC_CHECK_FUNCS(strcasecmp strncasecmp strcasestr strstr strchr strrchr wcwidth) +AC_CHECK_FUNCS(strcasecmp strncasecmp strcasestr strstr strchr strrchr) AC_CHECK_FUNCS(memmove bcopy stpcpy strdup index isdigit mempcpy memrchr) AC_CHECK_FUNCS(snprintf vsnprintf asprintf vasprintf) AC_CHECK_FUNCS(getifaddrs getpwnam inet_pton inet_ntop) diff --git a/src/bfu/inpfield.c b/src/bfu/inpfield.c index 297b84b5..7edd838c 100644 --- a/src/bfu/inpfield.c +++ b/src/bfu/inpfield.c @@ -680,12 +680,32 @@ kbd_field(struct dialog_data *dlg_data, struct widget_data *widget_data) if (check_kbd_textinput_key(ev)) { unsigned char *text = widget_data->cdata; int textlen = strlen(text); -#ifdef CONFIG_UTF8 - const unsigned char *ins = encode_utf8(get_kbd_key(ev)); - int inslen = utf8charlen(ins); -#else /* !CONFIG_UTF8 */ +#ifndef CONFIG_UTF8 + /* Both get_kbd_key(ev) and @text + * are in the terminal's charset. */ const int inslen = 1; -#endif /* !CONFIG_UTF8 */ +#else /* CONFIG_UTF8 */ + const unsigned char *ins; + int inslen; + + if (term->utf8) { + /* get_kbd_key(ev) is in UCS-4, + * and @text is in UTF-8. */ + ins = encode_utf8(get_kbd_key(ev)); + /* get_kbd_key(ev) cannot be L'\0' + * because @check_kbd_textinput_key + * would have rejected it. So it + * is OK to use @strlen below. */ + } else { + /* get_kbd_key(ev) is UCS-4, and @text + * is in the terminal's charset. */ + int cp = get_opt_codepage_tree(term->spec, + "charset"); + + ins = u2cp_no_nbsp(get_kbd_key(ev), cp); + } + inslen = strlen(ins); +#endif /* CONFIG_UTF8 */ if (textlen >= widget_data->widget->datalen - inslen) goto display_field; diff --git a/src/intl/charsets.c b/src/intl/charsets.c index 6c296a38..9f788215 100644 --- a/src/intl/charsets.c +++ b/src/intl/charsets.c @@ -1,8 +1,5 @@ /* Charsets convertor */ -#ifndef _XOPEN_SOURCE -#define _XOPEN_SOURCE /* wcwidth() */ -#endif #ifndef _GNU_SOURCE #define _GNU_SOURCE /* strcasecmp() */ #endif @@ -17,9 +14,6 @@ #include #include -#if HAVE_WCHAR_H -#include -#endif #if HAVE_WCTYPE_H #include #endif @@ -558,6 +552,30 @@ invalid_arg: * Find out number of standard terminal collumns needed for displaying symbol * (glyph) which represents Unicode character c. * + * TODO: Use wcwidth when it is available. This seems to require: + * - Make the configure script check whether and wcwidth exist. + * - Define _XOPEN_SOURCE and include . + * - Test that __STDC_ISO_10646__ is defined. (This macro means wchar_t + * matches ISO 10646 in all locales.) + * However, these do not suffice, because wcwidth depends on LC_CTYPE + * in glibc-2.3.6. For instance, wcwidth(0xff20) is -1 when LC_CTYPE + * is "fi_FI.ISO-8859-1" or "C", but 2 when LC_CTYPE is "fi_FI.UTF-8". + * defines __STDC_ISO_10646__ as 200009L, so 0xff20 means + * U+FF20 FULLWIDTH COMMERCIAL AT regardless of LC_CTYPE; but this + * character is apparently not supported in all locales. Why is that? + * - Perhaps there is standardese that requires supported characters + * to be convertable to multibyte form. Then ELinks could just pick + * some UTF-8 locale for its wcwidth purposes. + * - Perhaps wcwidth can even return different nonnegative values for + * the same ISO 10646 character in different locales. Then ELinks + * would have to set LC_CTYPE to match at least the terminal's + * charset (which may differ from the LC_CTYPE environment variable, + * especially when the master process is serving a slave terminal). + * But there is no guarantee that the libc supports all the same + * charsets as ELinks does. + * For now, it seems safest to avoid the potentially locale-dependent + * libc version of wcwidth, and instead use a hardcoded mapping. + * * @return 2 for double-width glyph, 1 for others. * TODO: May be extended to return 0 for zero-width glyphs * (like composing, maybe unprintable too). @@ -565,10 +583,6 @@ invalid_arg: inline int unicode_to_cell(unicode_val_T c) { -#if __STDC_ISO_10646__ && HAVE_WCWIDTH - if (wcwidth(c) >= 2) - return 2; -#else /* !__STDC_ISO_10646__ || !HAVE_WCWIDTH */ if (c >= 0x1100 && (c <= 0x115f /* Hangul Jamo */ || c == 0x2329 @@ -584,7 +598,6 @@ unicode_to_cell(unicode_val_T c) || (c >= 0x20000 && c <= 0x2fffd) || (c >= 0x30000 && c <= 0x3fffd))) return 2; -#endif /* !__STDC_ISO_10646__ || !HAVE_WCWIDTH */ return 1; }