From 0c756fc3e84edb207d37b85019a6e2defb7afaa0 Mon Sep 17 00:00:00 2001 From: Kalle Olavi Niemitalo Date: Thu, 21 May 2009 17:22:12 +0300 Subject: [PATCH] TRE: Check for 32-bit wchar_t at configure time This check used to be in src/elinks.h. Move it to configure.in so that (1) the result can be logged and (2) ELinks won't even link with TRE if wchar_t prevents its use. Also, rename HAVE_TRE_REGEX_H to CONFIG_TRE, to reflect that it is not always defined if the header exists. --- configure.in | 85 ++++++++++++++++++++++++++++++++++------ src/config/options.inc | 2 +- src/elinks.h | 24 ------------ src/intl/charsets.h | 1 + src/viewer/text/search.c | 22 +++++------ 5 files changed, 85 insertions(+), 49 deletions(-) diff --git a/configure.in b/configure.in index d3815e08..46cbb9ad 100644 --- a/configure.in +++ b/configure.in @@ -925,11 +925,20 @@ fi # =================================================================== # Check for TRE library # =================================================================== +# +# This section only checks that --without-tre is not given and the +# library seems to work, and sets TRE_CFLAGS, TRE_LIBS, and +# tre_log. It does not define CONFIG_TRE, and always resets +# LIBS and CFLAGS back to their original values. +# +# After any --enable-utf-8 and --disable-utf-8 options have been +# handled, a separate section decides whether to actually use TRE. AC_ARG_WITH([[tre]], [[ --without-tre disable TRE regex search support]]) if test "$with_tre" = no; then AC_MSG_CHECKING([[for TRE]]) AC_MSG_RESULT([[disabled]]) + tre_log="no (explicitly disabled)" else AC_MSG_CHECKING([[for TRE in pkg-config]]) if pkg-config tre; then @@ -955,22 +964,12 @@ else regwcomp(&re, L"zap", REG_ICASE); regwexec(&re, L"ELIZAPROGRAM", 1, match, 0);], [AC_MSG_RESULT([[yes]]) - AC_DEFINE([HAVE_TRE_REGEX_H], [1], - [Define to 1 if you have the header file.]) - # TRE_CFLAGS will be used only where needed. - # TRE_LIBS will be kept in LIBS and used everywhere. - EL_RESTORE_FLAGS - LIBS="$TRE_LIBS $LIBS"], + tre_log="available"], [AC_MSG_RESULT([[no]]) - TRE_CFLAGS= - TRE_LIBS= - EL_RESTORE_FLAGS]) - AC_SUBST(TRE_CFLAGS) - AC_SUBST(TRE_LIBS) + tre_log="no (TRE not found)"]) + EL_RESTORE_FLAGS fi -AC_CHECK_SIZEOF([wchar_t], [4], [[#include ]]) - # =================================================================== # Check for Ruby, optional even if installed. # =================================================================== @@ -1385,6 +1384,66 @@ AC_ARG_ENABLE(weehoofooboomookerchoo, [AC_MSG_ERROR(Are you strange, or what?)]) +# =================================================================== +# Decide whether to use TRE +# =================================================================== +# +# This must be done after the CONFIG_UTF8 check above. +# The first part of the TRE check is separate, to get +# the configure --help output in a sensible order. + +if test "$tre_log" = "available"; then + if test "$CONFIG_UTF8" = "yes"; then + # When CONFIG_UTF8 and CONFIG_TRE are both defined, + # src/viewer/text/search.c makes a string of + # unicode_val_T and gives it to regwexec(), which + # expects a string of wchar_t. If the unicode_val_T + # and wchar_t types are too different, this won't + # work, so try to detect that and disable regexp + # operations entirely in that case. + # + # Currently, this code only compares the sizes of the + # types. src/intl/charsets.h defines unicode_val_T as + # uint32_t, so we check whether wchar_t has exactly 32 + # bits. But don't use AC_CHECK_SIZEOF for this, because + # there doesn't seem to be a documented way to get the + # result of that for use in the configure script. + # + # C99 says the implementation can define + # __STDC_ISO_10646__ if wchar_t values match ISO 10646 + # (or Unicode) numbers in all locales. Do not check + # that macro here, because it is too restrictive: it + # should be enough for ELinks if the values match in + # the locales where ELinks is actually run. + AC_MSG_CHECKING([[whether wchar_t is exactly 32-bit]]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +#include +#include +int dummy[(sizeof(wchar_t) * CHAR_BIT == 32) ? 1 : -1];]], [])], + [AC_MSG_RESULT([[yes]]) + tre_log="TRE"], + [AC_MSG_RESULT([[no]]) + tre_log="no (unsuitable wchar_t)"]) + else + # If UTF-8 is not used, then wchar_t doesn't matter. + tre_log="TRE" + fi +fi + +if test "$tre_log" = "TRE"; then + AC_DEFINE([CONFIG_TRE], [1], + [Define as 1 to use the TRE library for regular expression searching. This requires the header file. If you define CONFIG_UTF8 too, then wchar_t must be exactly 32-bit so that it matches unicode_val_T.]) + # TRE_CFLAGS will be used only where needed. + LIBS="$LIBS $TRE_LIBS" +else + TRE_LIBS= + TRE_CFLAGS= +fi +AC_SUBST(TRE_CFLAGS) +AC_SUBST(TRE_LIBS) +EL_LOG_CONFIG([[CONFIG_TRE]], [[Regexp searching]], [[$tre_log]]) + + # =================================================================== # Further LDFLAGS tweaks # =================================================================== diff --git a/src/config/options.inc b/src/config/options.inc index b9c6a7f2..163f7b94 100644 --- a/src/config/options.inc +++ b/src/config/options.inc @@ -407,7 +407,7 @@ static struct option_info config_options_info[] = { N_("Whether the search should match the document text while " "maintaining case sensitivity.")), -#ifdef HAVE_TRE_REGEX_H +#ifdef CONFIG_TRE INIT_OPT_INT("document.browse.search", N_("Regular expressions"), "regex", 0, 0, 2, 0, N_("Enable searching with regular expressions:\n" diff --git a/src/elinks.h b/src/elinks.h index 0e164a1b..6cdcffb0 100644 --- a/src/elinks.h +++ b/src/elinks.h @@ -29,30 +29,6 @@ #define DEBUG_MEMLEAK #endif - -/* When CONFIG_UTF8 is defined, src/viewer/text/search.c makes a string - * of unicode_val_T and gives it to regwexec(), which expects a string - * of wchar_t. If the unicode_val_T and wchar_t types are too different, - * this won't work, so try to detect that and disable regexp operations - * entirely in that case. - * - * Currently, this code only compares the sizes of the types. Because - * unicode_val_T is defined as uint32_t and POSIX says bytes are 8-bit, - * sizeof(unicode_val_T) is 4 and the following compares SIZEOF_WCHAR_T - * to that. - * - * C99 says the implementation can define __STDC_ISO_10646__ if wchar_t - * values match ISO 10646 (or Unicode) numbers in all locales. Do not - * check that macro here, because it is too restrictive: it should be - * enough for ELinks if the values match in the locales where ELinks is - * actually run. */ - -#ifdef CONFIG_UTF8 -#if SIZEOF_WCHAR_T != 4 -#undef HAVE_TRE_REGEX_H -#endif -#endif - /* This maybe overrides some of the standard high-level functions, to ensure * the expected behaviour. These overrides are not system specific. */ #include "osdep/stub.h" diff --git a/src/intl/charsets.h b/src/intl/charsets.h index 32a676fe..7baf3cf3 100644 --- a/src/intl/charsets.h +++ b/src/intl/charsets.h @@ -1,6 +1,7 @@ #ifndef EL__INTL_CHARSETS_H #define EL__INTL_CHARSETS_H +/* The TRE check in configure.in assumes unicode_val_T is uint32_t. */ typedef uint32_t unicode_val_T; /* U+0020 SPACE. Normally the same as ' ' or L' ' but perhaps ELinks diff --git a/src/viewer/text/search.c b/src/viewer/text/search.c index 83656dce..0cb673ae 100644 --- a/src/viewer/text/search.c +++ b/src/viewer/text/search.c @@ -18,7 +18,7 @@ #include /* FreeBSD needs this before regex.h */ #include #include -#ifdef HAVE_TRE_REGEX_H +#ifdef CONFIG_TRE #include #endif @@ -270,7 +270,7 @@ get_range(struct document *document, int y, int height, int l, return 0; } -#ifdef HAVE_TRE_REGEX_H +#ifdef CONFIG_TRE /** Returns a string @c doc that is a copy of the text in the search * nodes from @a s1 to (@a s1 + @a doclen - 1) with the space at the * end of each line converted to a new-line character (LF). */ @@ -465,7 +465,7 @@ is_in_range_regex(struct document *document, int y, int height, return common_ctx.found; } -#endif /* HAVE_TRE_REGEX_H */ +#endif /* CONFIG_TRE */ static UCHAR * memacpy_u(unsigned char *text, int textlen, int utf8) @@ -602,7 +602,7 @@ is_in_range(struct document *document, int y, int height, if (get_range(document, y, height, textlen, &s1, &s2)) return 0; -#ifdef HAVE_TRE_REGEX_H +#ifdef CONFIG_TRE if (get_opt_int("document.browse.search.regex")) return is_in_range_regex(document, y, height, text, textlen, min, max, s1, s2, utf8); @@ -681,7 +681,7 @@ srch_failed: *pl = len; } -#ifdef HAVE_TRE_REGEX_H +#ifdef CONFIG_TRE struct get_searched_regex_context { int xoffset; int yoffset; @@ -749,7 +749,7 @@ get_searched_regex(struct document_view *doc_view, struct point **pt, int *pl, *pt = ctx.points; *pl = ctx.len; } -#endif /* HAVE_TRE_REGEX_H */ +#endif /* CONFIG_TRE */ static void get_searched(struct document_view *doc_view, struct point **pt, int *pl, int utf8) @@ -773,7 +773,7 @@ get_searched(struct document_view *doc_view, struct point **pt, int *pl, int utf return; } -#ifdef HAVE_TRE_REGEX_H +#ifdef CONFIG_TRE if (get_opt_int("document.browse.search.regex")) get_searched_regex(doc_view, pt, pl, l, s1, s2, utf8); else @@ -1594,7 +1594,7 @@ search_typeahead(struct session *ses, struct document_view *doc_view, * a nice cleanup target ;-). --pasky */ enum search_option { -#ifdef HAVE_TRE_REGEX_H +#ifdef CONFIG_TRE SEARCH_OPT_REGEX, #endif SEARCH_OPT_CASE, @@ -1602,7 +1602,7 @@ enum search_option { }; static struct option_resolver resolvers[] = { -#ifdef HAVE_TRE_REGEX_H +#ifdef CONFIG_TRE { SEARCH_OPT_REGEX, "regex" }, #endif { SEARCH_OPT_CASE, "case" }, @@ -1669,7 +1669,7 @@ search_dlg_do(struct terminal *term, struct memory_list *ml, hop->values, SEARCH_OPTIONS); hop->data = data; -#ifdef HAVE_TRE_REGEX_H +#ifdef CONFIG_TRE #define SEARCH_WIDGETS_COUNT 8 #else #define SEARCH_WIDGETS_COUNT 5 @@ -1693,7 +1693,7 @@ search_dlg_do(struct terminal *term, struct memory_list *ml, field = get_dialog_offset(dlg, SEARCH_WIDGETS_COUNT); add_dlg_field(dlg, text, 0, 0, NULL, MAX_STR_LEN, field, history); -#ifdef HAVE_TRE_REGEX_H +#ifdef CONFIG_TRE add_dlg_radio(dlg, _("Normal search", term), 1, 0, &hop->values[SEARCH_OPT_REGEX].number); add_dlg_radio(dlg, _("Regexp search", term), 1, 1, &hop->values[SEARCH_OPT_REGEX].number); add_dlg_radio(dlg, _("Extended regexp search", term), 1, 2, &hop->values[SEARCH_OPT_REGEX].number);