From d050cb67aa37390ab938b0a308c7541f19578506 Mon Sep 17 00:00:00 2001 From: Kalle Olavi Niemitalo Date: Sun, 22 Oct 2006 00:05:37 +0300 Subject: [PATCH] Revert the use of wcwidth() and describe why. This reverts the following commits: - 86ed79deafd6944ba956dd639cb03432fa2ac45a Use wcwidth if available and applicable. - 304f5fa1ea4853e5b8f635e5fb53c13dddfff814 comment fix (__STDC_ISO_10646__, not __STDC_ISO_10646) - part of 71eebf1cc7a905eccc21fd3815f0fa41292bcc1c Compensate for glibc not defining wcwidth() when _XOPEN_SOURCE is not set And adds a lengthy comment about LC_CTYPE problems. --- configure.in | 4 ++-- src/intl/charsets.c | 35 ++++++++++++++++++++++++----------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/configure.in b/configure.in index e0980d36..1748e1dc 100644 --- a/configure.in +++ b/configure.in @@ -168,7 +168,7 @@ AC_HEADER_STDC AC_HEADER_SYS_WAIT AC_HEADER_TIME -AC_CHECK_HEADERS(wctype.h wchar.h) +AC_CHECK_HEADERS(wctype.h) AC_CHECK_HEADERS(fcntl.h limits.h time.h unistd.h) AC_CHECK_HEADERS(sigaction.h) AC_CHECK_HEADERS(arpa/inet.h) @@ -279,7 +279,7 @@ AC_FUNC_MMAP AC_FUNC_STRFTIME AC_CHECK_FUNCS(cfmakeraw gethostbyaddr herror strerror) AC_CHECK_FUNCS(popen uname access chmod alarm timegm mremap) -AC_CHECK_FUNCS(strcasecmp strncasecmp strcasestr strstr strchr strrchr wcwidth) +AC_CHECK_FUNCS(strcasecmp strncasecmp strcasestr strstr strchr strrchr) AC_CHECK_FUNCS(memmove bcopy stpcpy strdup index isdigit mempcpy memrchr) AC_CHECK_FUNCS(snprintf vsnprintf asprintf vasprintf) AC_CHECK_FUNCS(getifaddrs getpwnam inet_pton inet_ntop) diff --git a/src/intl/charsets.c b/src/intl/charsets.c index 6c296a38..9f788215 100644 --- a/src/intl/charsets.c +++ b/src/intl/charsets.c @@ -1,8 +1,5 @@ /* Charsets convertor */ -#ifndef _XOPEN_SOURCE -#define _XOPEN_SOURCE /* wcwidth() */ -#endif #ifndef _GNU_SOURCE #define _GNU_SOURCE /* strcasecmp() */ #endif @@ -17,9 +14,6 @@ #include #include -#if HAVE_WCHAR_H -#include -#endif #if HAVE_WCTYPE_H #include #endif @@ -558,6 +552,30 @@ invalid_arg: * Find out number of standard terminal collumns needed for displaying symbol * (glyph) which represents Unicode character c. * + * TODO: Use wcwidth when it is available. This seems to require: + * - Make the configure script check whether and wcwidth exist. + * - Define _XOPEN_SOURCE and include . + * - Test that __STDC_ISO_10646__ is defined. (This macro means wchar_t + * matches ISO 10646 in all locales.) + * However, these do not suffice, because wcwidth depends on LC_CTYPE + * in glibc-2.3.6. For instance, wcwidth(0xff20) is -1 when LC_CTYPE + * is "fi_FI.ISO-8859-1" or "C", but 2 when LC_CTYPE is "fi_FI.UTF-8". + * defines __STDC_ISO_10646__ as 200009L, so 0xff20 means + * U+FF20 FULLWIDTH COMMERCIAL AT regardless of LC_CTYPE; but this + * character is apparently not supported in all locales. Why is that? + * - Perhaps there is standardese that requires supported characters + * to be convertable to multibyte form. Then ELinks could just pick + * some UTF-8 locale for its wcwidth purposes. + * - Perhaps wcwidth can even return different nonnegative values for + * the same ISO 10646 character in different locales. Then ELinks + * would have to set LC_CTYPE to match at least the terminal's + * charset (which may differ from the LC_CTYPE environment variable, + * especially when the master process is serving a slave terminal). + * But there is no guarantee that the libc supports all the same + * charsets as ELinks does. + * For now, it seems safest to avoid the potentially locale-dependent + * libc version of wcwidth, and instead use a hardcoded mapping. + * * @return 2 for double-width glyph, 1 for others. * TODO: May be extended to return 0 for zero-width glyphs * (like composing, maybe unprintable too). @@ -565,10 +583,6 @@ invalid_arg: inline int unicode_to_cell(unicode_val_T c) { -#if __STDC_ISO_10646__ && HAVE_WCWIDTH - if (wcwidth(c) >= 2) - return 2; -#else /* !__STDC_ISO_10646__ || !HAVE_WCWIDTH */ if (c >= 0x1100 && (c <= 0x115f /* Hangul Jamo */ || c == 0x2329 @@ -584,7 +598,6 @@ unicode_to_cell(unicode_val_T c) || (c >= 0x20000 && c <= 0x2fffd) || (c >= 0x30000 && c <= 0x3fffd))) return 2; -#endif /* !__STDC_ISO_10646__ || !HAVE_WCWIDTH */ return 1; }