1
0
Fork 0

TRE: Check for 32-bit wchar_t at configure time

This check used to be in src/elinks.h.  Move it to configure.in so
that (1) the result can be logged and (2) ELinks won't even link with
TRE if wchar_t prevents its use.

Also, rename HAVE_TRE_REGEX_H to CONFIG_TRE, to reflect that it is not
always defined if the header exists.
This commit is contained in:
Kalle Olavi Niemitalo 2009-05-21 17:22:12 +03:00 committed by Kalle Olavi Niemitalo
parent 07c90ea438
commit 0c756fc3e8
5 changed files with 85 additions and 49 deletions

View File

@ -925,11 +925,20 @@ fi
# ===================================================================
# Check for TRE library
# ===================================================================
#
# This section only checks that --without-tre is not given and the
# library seems to work, and sets TRE_CFLAGS, TRE_LIBS, and
# tre_log. It does not define CONFIG_TRE, and always resets
# LIBS and CFLAGS back to their original values.
#
# After any --enable-utf-8 and --disable-utf-8 options have been
# handled, a separate section decides whether to actually use TRE.
AC_ARG_WITH([[tre]], [[ --without-tre disable TRE regex search support]])
if test "$with_tre" = no; then
AC_MSG_CHECKING([[for TRE]])
AC_MSG_RESULT([[disabled]])
tre_log="no (explicitly disabled)"
else
AC_MSG_CHECKING([[for TRE in pkg-config]])
if pkg-config tre; then
@ -955,22 +964,12 @@ else
regwcomp(&re, L"zap", REG_ICASE);
regwexec(&re, L"ELIZAPROGRAM", 1, match, 0);],
[AC_MSG_RESULT([[yes]])
AC_DEFINE([HAVE_TRE_REGEX_H], [1],
[Define to 1 if you have the <tre/regex.h> header file.])
# TRE_CFLAGS will be used only where needed.
# TRE_LIBS will be kept in LIBS and used everywhere.
EL_RESTORE_FLAGS
LIBS="$TRE_LIBS $LIBS"],
tre_log="available"],
[AC_MSG_RESULT([[no]])
TRE_CFLAGS=
TRE_LIBS=
EL_RESTORE_FLAGS])
AC_SUBST(TRE_CFLAGS)
AC_SUBST(TRE_LIBS)
tre_log="no (TRE not found)"])
EL_RESTORE_FLAGS
fi
AC_CHECK_SIZEOF([wchar_t], [4], [[#include <wchar.h>]])
# ===================================================================
# Check for Ruby, optional even if installed.
# ===================================================================
@ -1385,6 +1384,66 @@ AC_ARG_ENABLE(weehoofooboomookerchoo,
[AC_MSG_ERROR(Are you strange, or what?)])
# ===================================================================
# Decide whether to use TRE
# ===================================================================
#
# This must be done after the CONFIG_UTF8 check above.
# The first part of the TRE check is separate, to get
# the configure --help output in a sensible order.
if test "$tre_log" = "available"; then
if test "$CONFIG_UTF8" = "yes"; then
# When CONFIG_UTF8 and CONFIG_TRE are both defined,
# src/viewer/text/search.c makes a string of
# unicode_val_T and gives it to regwexec(), which
# expects a string of wchar_t. If the unicode_val_T
# and wchar_t types are too different, this won't
# work, so try to detect that and disable regexp
# operations entirely in that case.
#
# Currently, this code only compares the sizes of the
# types. src/intl/charsets.h defines unicode_val_T as
# uint32_t, so we check whether wchar_t has exactly 32
# bits. But don't use AC_CHECK_SIZEOF for this, because
# there doesn't seem to be a documented way to get the
# result of that for use in the configure script.
#
# C99 says the implementation can define
# __STDC_ISO_10646__ if wchar_t values match ISO 10646
# (or Unicode) numbers in all locales. Do not check
# that macro here, because it is too restrictive: it
# should be enough for ELinks if the values match in
# the locales where ELinks is actually run.
AC_MSG_CHECKING([[whether wchar_t is exactly 32-bit]])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#include <limits.h>
#include <stddef.h>
int dummy[(sizeof(wchar_t) * CHAR_BIT == 32) ? 1 : -1];]], [])],
[AC_MSG_RESULT([[yes]])
tre_log="TRE"],
[AC_MSG_RESULT([[no]])
tre_log="no (unsuitable wchar_t)"])
else
# If UTF-8 is not used, then wchar_t doesn't matter.
tre_log="TRE"
fi
fi
if test "$tre_log" = "TRE"; then
AC_DEFINE([CONFIG_TRE], [1],
[Define as 1 to use the TRE library for regular expression searching. This requires the <tre/regex.h> header file. If you define CONFIG_UTF8 too, then wchar_t must be exactly 32-bit so that it matches unicode_val_T.])
# TRE_CFLAGS will be used only where needed.
LIBS="$LIBS $TRE_LIBS"
else
TRE_LIBS=
TRE_CFLAGS=
fi
AC_SUBST(TRE_CFLAGS)
AC_SUBST(TRE_LIBS)
EL_LOG_CONFIG([[CONFIG_TRE]], [[Regexp searching]], [[$tre_log]])
# ===================================================================
# Further LDFLAGS tweaks
# ===================================================================

View File

@ -407,7 +407,7 @@ static struct option_info config_options_info[] = {
N_("Whether the search should match the document text while "
"maintaining case sensitivity.")),
#ifdef HAVE_TRE_REGEX_H
#ifdef CONFIG_TRE
INIT_OPT_INT("document.browse.search", N_("Regular expressions"),
"regex", 0, 0, 2, 0,
N_("Enable searching with regular expressions:\n"

View File

@ -29,30 +29,6 @@
#define DEBUG_MEMLEAK
#endif
/* When CONFIG_UTF8 is defined, src/viewer/text/search.c makes a string
* of unicode_val_T and gives it to regwexec(), which expects a string
* of wchar_t. If the unicode_val_T and wchar_t types are too different,
* this won't work, so try to detect that and disable regexp operations
* entirely in that case.
*
* Currently, this code only compares the sizes of the types. Because
* unicode_val_T is defined as uint32_t and POSIX says bytes are 8-bit,
* sizeof(unicode_val_T) is 4 and the following compares SIZEOF_WCHAR_T
* to that.
*
* C99 says the implementation can define __STDC_ISO_10646__ if wchar_t
* values match ISO 10646 (or Unicode) numbers in all locales. Do not
* check that macro here, because it is too restrictive: it should be
* enough for ELinks if the values match in the locales where ELinks is
* actually run. */
#ifdef CONFIG_UTF8
#if SIZEOF_WCHAR_T != 4
#undef HAVE_TRE_REGEX_H
#endif
#endif
/* This maybe overrides some of the standard high-level functions, to ensure
* the expected behaviour. These overrides are not system specific. */
#include "osdep/stub.h"

View File

@ -1,6 +1,7 @@
#ifndef EL__INTL_CHARSETS_H
#define EL__INTL_CHARSETS_H
/* The TRE check in configure.in assumes unicode_val_T is uint32_t. */
typedef uint32_t unicode_val_T;
/* U+0020 SPACE. Normally the same as ' ' or L' ' but perhaps ELinks

View File

@ -18,7 +18,7 @@
#include <sys/types.h> /* FreeBSD needs this before regex.h */
#include <stdlib.h>
#include <string.h>
#ifdef HAVE_TRE_REGEX_H
#ifdef CONFIG_TRE
#include <tre/regex.h>
#endif
@ -270,7 +270,7 @@ get_range(struct document *document, int y, int height, int l,
return 0;
}
#ifdef HAVE_TRE_REGEX_H
#ifdef CONFIG_TRE
/** Returns a string @c doc that is a copy of the text in the search
* nodes from @a s1 to (@a s1 + @a doclen - 1) with the space at the
* end of each line converted to a new-line character (LF). */
@ -465,7 +465,7 @@ is_in_range_regex(struct document *document, int y, int height,
return common_ctx.found;
}
#endif /* HAVE_TRE_REGEX_H */
#endif /* CONFIG_TRE */
static UCHAR *
memacpy_u(unsigned char *text, int textlen, int utf8)
@ -602,7 +602,7 @@ is_in_range(struct document *document, int y, int height,
if (get_range(document, y, height, textlen, &s1, &s2))
return 0;
#ifdef HAVE_TRE_REGEX_H
#ifdef CONFIG_TRE
if (get_opt_int("document.browse.search.regex"))
return is_in_range_regex(document, y, height, text, textlen,
min, max, s1, s2, utf8);
@ -681,7 +681,7 @@ srch_failed:
*pl = len;
}
#ifdef HAVE_TRE_REGEX_H
#ifdef CONFIG_TRE
struct get_searched_regex_context {
int xoffset;
int yoffset;
@ -749,7 +749,7 @@ get_searched_regex(struct document_view *doc_view, struct point **pt, int *pl,
*pt = ctx.points;
*pl = ctx.len;
}
#endif /* HAVE_TRE_REGEX_H */
#endif /* CONFIG_TRE */
static void
get_searched(struct document_view *doc_view, struct point **pt, int *pl, int utf8)
@ -773,7 +773,7 @@ get_searched(struct document_view *doc_view, struct point **pt, int *pl, int utf
return;
}
#ifdef HAVE_TRE_REGEX_H
#ifdef CONFIG_TRE
if (get_opt_int("document.browse.search.regex"))
get_searched_regex(doc_view, pt, pl, l, s1, s2, utf8);
else
@ -1594,7 +1594,7 @@ search_typeahead(struct session *ses, struct document_view *doc_view,
* a nice cleanup target ;-). --pasky */
enum search_option {
#ifdef HAVE_TRE_REGEX_H
#ifdef CONFIG_TRE
SEARCH_OPT_REGEX,
#endif
SEARCH_OPT_CASE,
@ -1602,7 +1602,7 @@ enum search_option {
};
static struct option_resolver resolvers[] = {
#ifdef HAVE_TRE_REGEX_H
#ifdef CONFIG_TRE
{ SEARCH_OPT_REGEX, "regex" },
#endif
{ SEARCH_OPT_CASE, "case" },
@ -1669,7 +1669,7 @@ search_dlg_do(struct terminal *term, struct memory_list *ml,
hop->values, SEARCH_OPTIONS);
hop->data = data;
#ifdef HAVE_TRE_REGEX_H
#ifdef CONFIG_TRE
#define SEARCH_WIDGETS_COUNT 8
#else
#define SEARCH_WIDGETS_COUNT 5
@ -1693,7 +1693,7 @@ search_dlg_do(struct terminal *term, struct memory_list *ml,
field = get_dialog_offset(dlg, SEARCH_WIDGETS_COUNT);
add_dlg_field(dlg, text, 0, 0, NULL, MAX_STR_LEN, field, history);
#ifdef HAVE_TRE_REGEX_H
#ifdef CONFIG_TRE
add_dlg_radio(dlg, _("Normal search", term), 1, 0, &hop->values[SEARCH_OPT_REGEX].number);
add_dlg_radio(dlg, _("Regexp search", term), 1, 1, &hop->values[SEARCH_OPT_REGEX].number);
add_dlg_radio(dlg, _("Extended regexp search", term), 1, 2, &hop->values[SEARCH_OPT_REGEX].number);