Add mcpp from github.com/ned14/mcpp

This commit is contained in:
Benau 2020-01-03 12:46:35 +08:00
parent 40af912312
commit db5d1420f9
17 changed files with 17626 additions and 0 deletions

View File

@ -239,6 +239,19 @@ if(NOT SERVER_ONLY)
include_directories("${PROJECT_SOURCE_DIR}/lib/graphics_utils") include_directories("${PROJECT_SOURCE_DIR}/lib/graphics_utils")
endif() endif()
# Libmcpp
find_library(MCPP_LIBRARY NAMES mcpp libmcpp)
find_path(MCPP_INCLUDEDIR NAMES mcpp_lib.h PATHS)
if (NOT MCPP_LIBRARY OR NOT MCPP_INCLUDEDIR)
add_subdirectory("${PROJECT_SOURCE_DIR}/lib/mcpp")
include_directories("${PROJECT_SOURCE_DIR}/lib/mcpp")
SET(MCPP_LIBRARY mcpp)
message(STATUS "System libmcpp not found, use the bundled one.")
else()
include_directories("${MCPP_INCLUDEDIR}")
MESSAGE(STATUS "Use system libmcpp: ${MCPP_LIBRARY}")
endif()
if(NOT SERVER_ONLY) if(NOT SERVER_ONLY)
if(USE_SYSTEM_SQUISH) if(USE_SYSTEM_SQUISH)
find_library(SQUISH_LIBRARY NAMES squish libsquish) find_library(SQUISH_LIBRARY NAMES squish libsquish)
@ -578,6 +591,7 @@ target_link_libraries(supertuxkart
stkirrlicht stkirrlicht
${Angelscript_LIBRARIES} ${Angelscript_LIBRARIES}
${CURL_LIBRARIES} ${CURL_LIBRARIES}
mcpp
) )
if (USE_SQLITE3) if (USE_SQLITE3)

16
lib/mcpp/CMakeLists.txt Normal file
View File

@ -0,0 +1,16 @@
cmake_minimum_required(VERSION 2.6)
if (UNIX OR MINGW)
add_definitions(-O3)
endif()
add_definitions(-DMCPP_LIB)
add_definitions(-DHAVE_CONFIG_H)
add_library(mcpp STATIC
directive.c
eval.c
expand.c
main.c
main_libmcpp.c
mbchar.c
support.c
system.c
)

89
lib/mcpp/config.h Normal file
View File

@ -0,0 +1,89 @@
// **********************************************************************
//
// Copyright (c) 2015 ZeroC, Inc. All rights reserved.
//
// **********************************************************************
// Simplified and reduced version of config.h, with support for Windows,
// OS X and Linux.
#define COMPILER INDEPENDENT
// Windows support for MSC and MINGW
#if defined(_WIN32)
#define HOST_COMPILER MSC
#define HOST_SYSTEM SYS_WIN
#define SYSTEM SYS_WIN
#define OBJEXT "obj"
#elif defined(__APPLE__)
/* Define if the cases of file name are folded. */
#define FNAME_FOLD 1
/* Define to 1 if the system has the type `intmax_t'. */
#define HAVE_INTMAX_T 1
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if the system has the type `long long'. */
#define HAVE_LONG_LONG 1
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the `stpcpy' function. */
#define HAVE_STPCPY 1
/* Define the host compiler. */
#define HOST_COMPILER GNUC
/* Define the host system. */
#define HOST_SYSTEM SYS_MAC
/* Define printf length modifier for the longest integer. */
#define LL_FORM "j"
/* Define the suffix of object file. */
#define OBJEXT "o"
/* Define the target system. */
#define SYSTEM SYS_MAC
#else
// Linux.
/* Define to 1 if the system has the type `intmax_t'. */
#define HAVE_INTMAX_T 1
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if the system has the type `long long'. */
#define HAVE_LONG_LONG 1
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the `stpcpy' function. */
#define HAVE_STPCPY 1
/* Define the host compiler. */
#define HOST_COMPILER GNUC
/* Define the host system. */
#define HOST_SYSTEM SYS_LINUX
/* Define printf length modifier for the longest integer. */
#define LL_FORM "j"
/* Define the suffix of object file. */
#define OBJEXT "o"
/* Define the target system. */
#define SYSTEM SYS_LINUX
#endif

382
lib/mcpp/configed.H Normal file
View File

@ -0,0 +1,382 @@
/*
* configed.H
* Configurations for MCPP using config.h genarated by configure script.
*
* WARNING: These settings assume HOST == TARGET. In case of HOST
* differs from TARGET, you must edit this file here and there.
*/
#define TRUE 1
#define FALSE 0
#define DATE "2008/11" /* Date of mcpp */
/*
* 'Target' means the O.S. and the compiler to which cpp is implemented.
* 'Host' means the O.S. and the compiler with which cpp is compiled.
*/
#include "config.h"
#ifndef COMPILER /* No target compiler specified */
#define COMPILER COMPILER_UNKNOWN
#endif
#ifndef HOST_COMPILER /* No host compiler specified */
#define HOST_COMPILER COMPILER
#endif
/*
* P A R T 1 Configurations for target-operating-system
* and target-compiler.
*/
/*
* Names of the SYSTEM (i.e. target operating system). This is needed so that
* cpp can use appropriate filename conventions.
*/
#define SYS_UNKNOWN 0
#define SYS_UNIX 0x1000
#define SYS_LINUX 0x1800 /* (SYS_LINUX & 0xF000) == SYS_UNIX */
#define SYS_FREEBSD 0x1A00 /* (SYS_FREEBSD & 0xF000) == SYS_UNIX */
#define SYS_CYGWIN 0x1C00 /* (SYS_CYGWIN & 0xF000) == SYS_UNIX */
#define SYS_MAC 0x1E00 /* (SYS_MAC & 0xF000) == SYS_UNIX */
#define SYS_WIN 0x7000
#define SYS_WIN32 0x7400 /* (SYS_WIN32 & 0xF000) == SYS_WIN */
#define SYS_MINGW 0x7C00 /* (SYS_MINGW & 0xF000) == SYS_WIN */
/* COMPILER */
#define COMPILER_UNKNOWN 0
#define MSC 0x7400 /* Microsoft C, Visual C++ */
#define BORLANDC 0x7440 /* Borland C */
#define WIN_SYMANTECC 0x7470 /* Symantec for Windows */
#define LCC 0x74C0 /* LCC-Win32 */
#define GNUC 0x00E0 /* GNU C (GCC) */
#define INDEPENDENT 0xFFFF /* No target, compiler-independent-build*/
#define SYS_FAMILY (SYSTEM & 0xF000)
#define COMPILER_FAMILY (COMPILER & 0xF0)
#define HOST_SYS_FAMILY (HOST_SYSTEM & 0xF000)
/* Default MBCHAR (multi-byte character) encoding. */
#define EUC_JP 0x10 /* Extended UNIX code of JIS X 0208 */
#define GB2312 0x20 /* EUC-like encoding of Chinese GB 2312-80 */
#define KSC5601 0x30 /* EUC-like encoding of Korean KS C 5601 */
#define SJIS 0x80 /* Shift-JIS encoding of JIS X 0208 */
#define BIGFIVE 0x90 /* Encoding of Taiwanese Big Five */
#define ISO2022_JP 0x100 /* ISO-2022-JP (ISO-2022-JP1) encoding */
#define UTF8 0x1000 /* UTF-8 encoding */
/*
* MBCHAR means multi-byte character encoding.
* MBCHAR means the default encoding, and you can change the encoding by
* #pragma MCPP setlocale, -e <encoding> option or environment variable
* LC_ALL, LC_CTYPE, LANG.
* MBCHAR == 0 means not to recognize any multi-byte character encoding.
*/
/*
* In order to predefine target-dependent macros,
* several macros are defined here:
* *_OLD define the macro beginning with an alphabetic letter,
* *_STD, *_STD?, *_EXT, *_EXT2 define the macro beginning with an '_'.
* *_STD1 define the macro beginning with '__' and ending with an alpha-
* numeric letter.
* *_STD2 define the macro beginning with '__' and ending with '__'.
* These may not be defined, if they are not needed.
* They should not be #defined to no token or to "".
*
* SYSTEM_OLD, SYSTEM_STD1, SYSTEM_STD2, SYSTEM_EXT, SYSTEM_EXT2
* define the target operating system (by name).
* SYSTEM_SP_OLD, SYSTEM_SP_STD define the target-OS specific macro name
* COMPILER_OLD, COMPILER_STD1, COMPILER_STD2, COMPILER_EXT, COMPILER_EXT2
* , COMPILER_SP_OLD, COMPILER_SP_STD
* define the target compiler (by name).
* COMPILER_CPLUS defines the target C++ compiler.
* COMPILER_SP1, COMPILER_SP2, COMPILER_SP3
* define the compiler-specific macros.
*
* <macro>_VAL specify the value of the <macro>.
* If not specified, these values default to "1".
* To define the value of no-token, specify as "" rather than no-token.
* SYSTEM_OLD, SYSTEM_STD?, COMPILER_OLD have the value of "1".
*/
/*
* target-compiler-dependent definitions:
*
* LINE_PREFIX defines the output line prefix, if not "#line 123".
* This should be defined as "# " to represent "# 123" format
* ("#line " represents "#line 123" format).
*
* ENV_C_INCLUDE_DIR may be defined to the name of environment-variable for
* C include directory.
* ENV_CPLUS_INCLUDE_DIR is name of environment-variable for C++ include
* directory which exists other than ENV_C_INCLUDE_DIR.
* ENV_SEP is the separator (other than space) of include-paths in an
* environment-variable. e.g. the ':' in
* "/usr/abc/include:/usr/xyz/include"
*
* EMFILE should be defined to the macro to represent errno of 'too many
* open files' if the macro is different from EMFILE.
*
* ONE_PASS should be set TRUE, if COMPILER is "one pass compiler".
*
* FNAME_FOLD means that target-system folds upper and lower cases of
* directory and file-name.
*
* SEARCH_INIT specifies the default value of 'search_rule' (in system.c).
* 'search_rule' holds searching rule of #include "header.h" to
* search first before searching user specified or system-
* specific include directories.
* CURRENT means to search the directory relative to "current
* directory" which is current at cpp invocation.
* SOURCE means to search the directory relative to that of the
* source file (i.e. "includer").
* (CURRENT & SOURCE) means to search current directory first
* source directory next.
* 'search_rule' is initialized to SEARCH_INIT.
*/
#define CURRENT 1
#define SOURCE 2
#if SYS_FAMILY == SYS_UNIX
#define SYSTEM_OLD "unix"
#define SYSTEM_STD1 "__unix"
#define SYSTEM_STD2 "__unix__"
#endif
#if SYSTEM == SYS_FREEBSD
#define SYSTEM_EXT "__FreeBSD__"
#endif
#if SYSTEM == SYS_LINUX
#define SYSTEM_EXT "__linux__"
#endif
#if SYSTEM == SYS_MAC
#define SYSTEM_EXT "__APPLE__"
#endif
#if SYSTEM == SYS_CYGWIN
#define SYSTEM_EXT "__CYGWIN__"
#if defined (__CYGWIN64__)
#define SYSTEM_EXT2 "__CYGWIN64__"
#else
#define SYSTEM_EXT2 "__CYGWIN32__"
#endif
#ifndef MBCHAR
#define MBCHAR SJIS
#endif
#endif
#if SYSTEM == SYS_MINGW
#define SYSTEM_EXT "__MINGW__"
#if defined (__MINGW64__)
#define SYSTEM_EXT2 "__MINGW64__"
#else
#define SYSTEM_EXT2 "__MINGW32__"
#endif
#ifndef MBCHAR
#define MBCHAR SJIS
#endif
#endif
#if SYS_FAMILY == SYS_UNIX
#ifndef MBCHAR
#define MBCHAR EUC_JP /* UTF8 if you like */
#endif
#endif
#if COMPILER == GNUC
#define COMPILER_EXT "__GNUC__"
#define COMPILER_EXT_VAL GCC_MAJOR_VERSION
#define COMPILER_EXT2 "__GNUC_MINOR__"
#define COMPILER_EXT2_VAL GCC_MINOR_VERSION
#define COMPILER_CPLUS "__GNUG__"
#define COMPILER_CPLUS_VAL GCC_MAJOR_VERSION
#ifndef ENV_C_INCLUDE_DIR
#define ENV_C_INCLUDE_DIR "C_INCLUDE_PATH"
#define ENV_CPLUS_INCLUDE_DIR "CPLUS_INCLUDE_PATH"
#endif
/*
* __SIZE_TYPE__, __PTRDIFF_TYPE__ and __WCHAR_TYPE__ are the predefines of
* GCC and undocumented in GCC 2.
* On GCC V.3.*, V.4.*, these macros are known by mcpp_g*_predef_*.h files.
*/
#if __GNUC__ == 2
#define COMPILER_SP1 "__SIZE_TYPE__"
#define COMPILER_SP2 "__PTRDIFF_TYPE__"
#define COMPILER_SP3 "__WCHAR_TYPE__"
#endif
#define CMP_NAME "GCC"
#endif /* COMPILER == GNUC */
#if COMPILER == INDEPENDENT
/* specifications of compiler-independent-build */
#define LINE_PREFIX "#line "
#define STD_LINE_PREFIX TRUE /* Output #line by C source format */
#define HAVE_DIGRAPHS TRUE /* Output digraphs as it is */
#define SEARCH_INIT SOURCE /* Include directory relative to source */
#define SJIS_IS_ESCAPE_FREE TRUE /* Do not treat SJIS specially */
#define BIGFIVE_IS_ESCAPE_FREE TRUE /* Do not treat specially */
#define ISO2022_JP_IS_ESCAPE_FREE TRUE /* Do not treat specially */
#define TARGET_HAVE_LONG_LONG TRUE /* dummy */
#define STDC_VERSION 199409L /* Initial value of __STDC_VERSION__ */
#endif
/*
* defaults
*/
#ifdef SYSTEM_EXT
#ifndef SYSTEM_EXT_VAL
#define SYSTEM_EXT_VAL "1"
#endif
#endif
#ifdef SYSTEM_EXT2
#ifndef SYSTEM_EXT2_VAL
#define SYSTEM_EXT2_VAL "1"
#endif
#endif
#ifdef COMPILER_STD1
#ifndef COMPILER_STD1_VAL
#define COMPILER_STD1_VAL "1"
#endif
#endif
#ifdef COMPILER_STD2
#ifndef COMPILER_STD2_VAL
#define COMPILER_STD2_VAL "1"
#endif
#endif
#ifdef COMPILER_EXT
#ifndef COMPILER_EXT_VAL
#define COMPILER_EXT_VAL "1"
#endif
#endif
#ifdef COMPILER_EXT2
#ifndef COMPILER_EXT2_VAL
#define COMPILER_EXT2_VAL "1"
#endif
#endif
#ifdef COMPILER_CPLUS
#ifndef COMPILER_CPLUS_VAL
#define COMPILER_CPLUS_VAL "1"
#endif
#endif
#ifndef ENV_C_INCLUDE_DIR
#define ENV_C_INCLUDE_DIR "INCLUDE"
#endif
#ifndef ENV_CPLUS_INCLUDE_DIR
#define ENV_CPLUS_INCLUDE_DIR "CPLUS_INCLUDE"
#endif
#ifndef ENV_SEP
#if SYS_FAMILY == SYS_WIN
#define ENV_SEP ';'
#else
#define ENV_SEP ':'
#endif
#endif
#ifndef ONE_PASS
#define ONE_PASS FALSE
#endif
/*
* CHARBIT, UCHARMAX are respectively CHAR_BIT, UCHAR_MAX of target compiler.
* CHARBIT should be defined to the number of bits per character.
* It is needed only for processing of multi-byte character constants.
* UCHARMAX should be defined to the maximum value of type unsigned char
* or maximum value of unsigned int which is converted from type (signed)
* char.
*
* LONGMAX should be defined to the LONG_MAX in <limits.h>.
* ULONGMAX should be defined to the ULONG_MAX in <limits.h>.
*/
/* _POSIX_* only to get PATH_MAX */
#define _POSIX_ 1
#define _POSIX_SOURCE 1
#ifndef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 1
#define _POSIX_C_SOURCE_defined 1
#endif
#include "limits.h"
#undef _POSIX_
#undef _POSIX_SOURCE
#ifdef _POSIX_C_SOURCE_defined
#undef _POSIX_C_SOURCE
#undef _POSIX_C_SOURCE_defined
#endif
#define CHARBIT CHAR_BIT
#define UCHARMAX UCHAR_MAX
#define USHRTMAX USHRT_MAX
#define LONGMAX LONG_MAX
#define ULONGMAX ULONG_MAX
/*
* Define MBCHAR (multi-byte character encoding) to SJIS, EUC_JP or other.
*/
#ifndef MBCHAR
#define MBCHAR 0
#endif
/*
* SJIS_IS_ESCAPE_FREE means the compiler does not escape '0x5c' ('\\') in
* shift-JIS encoded multi-byte character. SJIS_IS_ESCAPE_FREE == FALSE
* enables cpp to insert * '\\' before '\\' of the 2nd byte of SJIS code in
* literal. This insertion is for the compiler-proper which can't recognize
* SJIS literal.
* BIGFIVE_IS_ESCAPE_FREE means similar case on BIGFIVE encoding.
* ISO2022_JP_IS_ESCAPE_FREE means similar case on ISO2022_JP encoding.
*/
#ifndef SJIS_IS_ESCAPE_FREE
#define SJIS_IS_ESCAPE_FREE FALSE /* or TRUE following your compiler */
#endif
#ifndef BIGFIVE_IS_ESCAPE_FREE
#define BIGFIVE_IS_ESCAPE_FREE FALSE /* or TRUE following your compiler */
#endif
#ifndef ISO2022_JP_IS_ESCAPE_FREE
#define ISO2022_JP_IS_ESCAPE_FREE FALSE /* or TRUE following compiler */
#endif
/*
* P A R T 2 Configurations for host-compiler.
*
* WARNING: In case of HOST_COMPILER differs from COMPILER, you must
* edit here and there of this part.
*/
#define HOST_HAVE_STPCPY HAVE_STPCPY
/*
* Declaration of standard library functions and macros.
*/
/* stdin, stdout, stderr, FILE, NULL, fgets(), fputs() and other functions. */
#include "stdio.h"
/* PATHMAX is the maximum length of path-list on the host system. */
#ifdef PATH_MAX
#define PATHMAX PATH_MAX /* Posix macro */
#else
#define PATHMAX FILENAME_MAX
#endif
/* islower(), isupper(), toupper(), isdigit(), isxdigit(), iscntrl() */
#include "ctype.h"
/* errno */
#include "errno.h"
#include "string.h"
#include "stdlib.h"
#include "time.h"
#include "setjmp.h"
/* For debugging malloc systems by kmatsui */
#if KMMALLOC && _MEM_DEBUG
#include "xalloc.h"
#endif

1699
lib/mcpp/directive.c Normal file

File diff suppressed because it is too large Load Diff

1673
lib/mcpp/eval.c Normal file

File diff suppressed because it is too large Load Diff

2980
lib/mcpp/expand.c Normal file

File diff suppressed because it is too large Load Diff

564
lib/mcpp/internal.H Normal file
View File

@ -0,0 +1,564 @@
/*-
* Copyright (c) 1998, 2002-2008 Kiyoshi Matsui <kmatsui@t3.rim.or.jp>
* All rights reserved.
*
* Some parts of this code are derived from the public domain software
* DECUS cpp (1984,1985) written by Martin Minow.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* I N T E R N A L . H
* I n t e r n a l D e f i n i t i o n s f o r M C P P
*
* In general, definitions in this file should not be changed by implementor.
*/
#ifndef SYSTEM_H
#error "system.H" must be included prior to "internal.H"
#endif
#include "mcpp_out.h"
#if MCPP_LIB
#include "mcpp_lib.h" /* External interface when used as library */
#endif
#define EOS '\0' /* End of string */
#define CHAR_EOF 0 /* Returned by get_ch() on eof */
#define VA_ARGS (UCHARMAX + 1) /* Signal of variable arguments */
#define GVA_ARGS (VA_ARGS * 2) /* GCC-specific variable args */
#define AVA_ARGS (VA_ARGS | GVA_ARGS)/* Any variable arguments */
#define DEF_PRAGMA (-1 - AVA_ARGS) /* _Pragma() pseudo-macro */
#define DEF_NOARGS (-2 - AVA_ARGS) /* #define foo vs #define foo() */
#define DEF_NOARGS_PREDEF_OLD (DEF_NOARGS - 1)
/* Compiler-predefined macro without leading '_' */
#define DEF_NOARGS_PREDEF (DEF_NOARGS - 2)/* Compiler-predefined macro */
#define DEF_NOARGS_STANDARD (DEF_NOARGS - 3)/* Standard predefined macro */
#define DEF_NOARGS_DYNAMIC (DEF_NOARGS - 4)/* Standard dynamically defined */
/*
* These magic characters must be control characters which can't be used
* in source file.
*/
/* for '#pragma MCPP debug macro_call' and -K option in STD mode. */
#define MAC_INF 0x18 /* Magic for macro informations */
/* In STD and POST_STD modes (IN_SRC and TOK_SEP are for STD mode only). */
#define DEF_MAGIC 0x19 /* Magic to prevent recursive expansion */
#define IN_SRC 0x1A /* Magic of name from source */
#define RT_END 0x1C /* Magic of macro rescan boundary */
#define ST_QUOTE 0x1D /* Magic for stringizing */
#define CAT 0x1E /* Token concatenation delim. */
#define TOK_SEP 0x1F /* Magic to wrap expanded macro */
/* In OLD_PREP mode. */
#define COM_SEP 0x1F /* Comment of 0-length */
#define MAC_PARM 0x7F /* Macro parameter signal */
/* Special character types */
#define LET 1 /* Letter (alphabet and _) */
#define DIG 2 /* Digit */
#define DOT 4 /* . might start a number */
#define PUNC 8 /* Punctuators and operators */
#define QUO 0x10 /* Both flavors of quotation ",'*/
#define SPA 0x20 /* White spaces */
#define HSP 0x40
/* Horizontal white spaces (' ', '\t', TOK_SEP) */
/*
* Codes for operators used in #if expression.
* The value of the scanned operator is stored in 'openum'.
*/
#define INV 0 /* Invalid, must be zero */
#define OP_EOE INV /* End of expression */
#define VAL 1 /* Value (operand) */
#define OP_LPA 2 /* ( */
/* The following are unary. */
#define FIRST_UNOP OP_PLU /* First unary operator */
#define OP_PLU 3 /* + */
#define OP_NEG 4 /* - */
#define OP_COM 5 /* ~ */
#define OP_NOT 6 /* ! */
#define LAST_UNOP OP_NOT /* Last unary operator */
/* The following are binary. */
#define FIRST_BINOP OP_MUL /* First binary operator */
#define OP_MUL 7 /* * */
#define OP_DIV 8 /* / */
#define OP_MOD 9 /* % */
#define OP_ADD 10 /* + */
#define OP_SUB 11 /* - */
#define OP_SL 12 /* << */
#define OP_SR 13 /* >> */
#define OP_LT 14 /* < */
#define OP_LE 15 /* <= */
#define OP_GT 16 /* > */
#define OP_GE 17 /* >= */
#define OP_EQ 18 /* == */
#define OP_NE 19 /* != */
#define OP_AND 20 /* & */
#define OP_XOR 21 /* ^ */
#define OP_OR 22 /* | */
#define OP_ANA 23 /* && */
#define OP_ORO 24 /* || */
#define OP_QUE 25 /* ? */
#define OP_COL 26 /* : */
#define LAST_BINOP OP_COL /* Last binary operator */
/* Parenthesis */
#define OP_RPA 27 /* ) */
#define OP_END 28 /* End of expression marker */
#define OP_FAIL (OP_END + 1) /* For error returns */
/*
* The following are operators used in macro definition only.
*/
/* In STD and POST_STD modes. */
#define OP_STR 30 /* # */
#define OP_CAT 31 /* ## */
#define OP_ELL 32 /* ... */
/*
* The following are C source operators or punctuators,
* not preprocessing operators.
* Note: "sizeof", "defined" are read as identifier for convenience.
*/
#define OP_1 33 /* Any other single byte ops or puncs */
/* =, ., ;, [, ], {, }, ',' */
#define OP_2 34 /* Any other two bytes operators */
/* &=, |=, ++, +=, --, -=, ->, %=, *=, /=, ^=, */
#define OP_3 35 /* Three bytes operators : <<=, >>= */
/*
* The following are operators spelled in digraphs.
*/
/* In STD and POST_STD modes. */
#define OP_LBRACE_D 0x40 /* <% i.e. { */
#define OP_RBRACE_D 0x41 /* %> i.e. } */
#define OP_LBRCK_D 0x42 /* <: i.e. [ */
#define OP_RBRCK_D 0x43 /* :> i.e. ] */
#define OP_SHARP_D 0x44 /* %: i.e. # */
#define OP_DSHARP_D 0x45 /* %:%: i.e. ## */
#define OP_DIGRAPH 0x40 /* (OP_*_D & OP_DIGRAPH) == 0x40 */
/*
* The following are for lexical scanning only.
*/
/* Token types */
#define NO_TOKEN 0
#define NAM 65 /* Identifier (name, keyword) */
#define NUM 66 /* Preprocessing number */
#define STR 67 /* Character string literal */
#define CHR 69 /* Integer character constant */
/* In STD and POST_STD modes. */
#define WSTR 68 /* Wide string literal */
#define WCHR 70 /* Wide character constant */
#define OPE 71 /* Operator or punctuator */
#define SPE 72 /* Unknown token (@ or others) */
#define SEP 73 /* Token separator or magics */
/*
* The following are values of 'mcpp_debug' variable which is set by the
* arguments of '#pragma MCPP debug' directive.
*/
#define PATH 1
#define TOKEN 2
#define EXPAND 4
#define MACRO_CALL 8
#define IF 16
#define EXPRESSION 32
#define GETC 64
#define MEMORY 128
/* MAC_* represent macro information types for -K option. */
#define MAC_CALL_START 1
#define MAC_CALL_END 2
#define MAC_ARG_START 3
#define MAC_ARG_END 4
/* MB_ERROR signals wrong multi-byte character sequence. */
#define MB_ERROR 0x8000
#if MCPP_LIB && HOST_COMPILER == GNUC \
&& (SYS_FAMILY == SYS_UNIX && SYSTEM != SYS_CYGWIN)
/* For GCC 4.* on UNIXes */
#pragma GCC visibility push( hidden) /* Hide these names from outside */
#endif
/*
* The DEFBUF structure stores information about #defined macros.
* Note that DEFBUF->parmnames is parameter names catenated with commas,
* which is saved for the check of redefinition for STD mode.
* 'parmnames' and 'repl' are allocated to the area succeding to name.
*/
typedef struct defbuf {
struct defbuf * link; /* Pointer to next def in chain */
short nargs; /* Number of parameters */
char * parmnames; /* -> Parameter names catenated by ',' */
char * repl; /* Pointer to replacement text */
const char * fname; /* Macro is defined in the source file */
long mline; /* at the line. */
char push; /* Push level indicator */
char name[1]; /* Macro name */
} DEFBUF;
/*
* The FILEINFO structure stores information about open files and macros
* being expanded.
*/
typedef struct fileinfo {
char * bptr; /* Current pointer into buffer */
long line; /* Current line number of file */
FILE * fp; /* Source file if non-null */
long pos; /* Position next to #include */
struct fileinfo * parent; /* Link to includer */
struct ifinfo * initif; /* Initial ifstack (return there on EOF)*/
int sys_header; /* System header file or not */
int include_opt; /* Specified by -include option */
const char ** dirp; /* Include directory the file resides */
const char * src_dir; /* Directory of source file */
const char * real_fname; /* Real file name */
const char * full_fname; /* Real full path list */
char * filename; /* File/macro name (maybe changed) */
char * buffer; /* Buffer of current input line */
#if MCPP_LIB
/* Save output functions during push/pop of #includes */
int (* last_fputc) ( int c, OUTDEST od);
int (* last_fputs) ( const char * s, OUTDEST od);
int (* last_fprintf)( OUTDEST od, const char * format, ...);
#endif
} FILEINFO;
/*
* IFINFO stores information of conditional compilation.
*/
typedef struct ifinfo {
int stat; /* State of compilation */
long ifline; /* Line #if started */
long elseline; /* Line #else started */
} IFINFO;
/*
* These bits are set in IFINFO.stat
*/
#define WAS_COMPILING 1 /* TRUE if compile set at entry */
#define ELSE_SEEN 2 /* TRUE when #else processed */
#define TRUE_SEEN 4 /* TRUE when #if TRUE processed */
#define compiling ifstack[0].stat
#define FP2DEST(fp) \
(fp == fp_out) ? OUT : \
((fp == fp_err) ? ERR : \
((fp == fp_debug) ? DBG : \
((OUTDEST) -1)))
/* VAL_SIGN structure stores information about evaluated number. */
typedef struct val_sign {
expr_t val; /* Value */
int sign; /* Signed, unsigned, error */
} VAL_SIGN;
/* Values of VAL_SIGN.sign. */
#define SIGNED 1
#define UNSIGNED 0
#define VAL_ERROR (-1)
/* LINE_COL stores information of line and column data */
typedef struct line_col {
long line;
size_t col;
} LINE_COL;
/* Value of macro_line on macro call error. */
#define MACRO_ERROR (-1L)
/*
* Values of insert_sep (flag of insertion of token separator).
* Used only in POST_STD mode.
*/
#define NO_SEP 0 /* No separator is inserted */
#define INSERT_SEP 1 /* Next get_ch() insert a separator */
#define INSERTED_SEP 2 /* Last get_ch() Inserted a separator */
#define str_eq(s1, s2) (strcmp(s1, s2) == 0)
#ifndef IO_SUCCESS
#define IO_SUCCESS 0
#endif
#ifndef IO_ERROR
#define IO_ERROR (errno ? errno : 1)
#endif
/*
* Externs
*/
/* The minimum translation limits specified by the Standards. */
extern struct std_limits_ {
long str_len; /* Least maximum of string len. */
size_t id_len; /* Least maximum of ident len. */
int n_mac_pars; /* Least maximum of num of pars.*/
int exp_nest; /* Least maximum of expr nest */
int blk_nest; /* Least maximum of block nest */
int inc_nest; /* Least maximum of include nest*/
long n_macro; /* Least maximum of num of macro*/
long line_num; /* Maximum source line number */
} std_limits;
/* The boolean flags specified by the execution options. */
extern struct option_flags_ {
int c; /* -C option (keep comments) */
int k; /* -k option (keep white spaces)*/
int z; /* -z option (no-output of included file) */
int p; /* -P option (no #line output) */
int q; /* -Q option (diag to mcpp.err) */
int v; /* -v option (verbose) */
int trig; /* -3 option (toggle trigraphs) */
int dig; /* -2 option (toggle digraphs) */
int lang_asm; /* -a option (assembler source) */
int no_source_line; /* Do not output line in diag. */
int dollar_in_name; /* Allow $ in identifiers */
} option_flags;
extern int mcpp_mode; /* Mode of preprocessing */
extern int stdc_val; /* Value of __STDC__ */
extern long stdc_ver; /* Value of __STDC_VERSION__ */
extern long cplus_val; /* Value of __cplusplus for C++ */
extern int stdc2; /* cplus_val or (stdc_ver >= 199901L) */
extern int stdc3; /* (stdc_ver or cplus_val) >= 199901L */
extern int standard; /* mcpp_mode is STD or POST_STD */
extern int std_line_prefix; /* #line in C source style */
extern int warn_level; /* Level of warning */
extern int errors; /* Error counter */
extern long src_line; /* Current source line number */
extern int wrong_line; /* Force #line to compiler */
extern int newlines; /* Count of blank lines */
extern int keep_comments; /* Don't remove comments */
extern int keep_spaces; /* Don't remove white spaces */
extern int include_nest; /* Nesting level of #include */
extern const char * null; /* "" string for convenience */
extern const char ** inc_dirp; /* Directory of #includer */
extern const char * cur_fname; /* Current source file name */
extern int no_output; /* Don't output included file */
extern int in_directive; /* In process of #directive */
extern int in_define; /* In #define line */
extern int in_getarg; /* Collecting arguments of macro*/
extern int in_include; /* In #include line */
extern int in_if; /* In #if and non-skipped expr. */
extern long macro_line; /* Line number of macro call */
extern char * macro_name; /* Currently expanding macro */
extern int openum; /* Number of operator or punct. */
extern IFINFO * ifptr; /* -> current ifstack item */
extern FILEINFO * infile; /* Current input file or macro */
extern FILE * fp_in; /* Input stream to preprocess */
extern FILE * fp_out; /* Output stream preprocessed */
extern FILE * fp_err; /* Diagnostics stream */
extern FILE * fp_debug; /* Debugging information stream */
extern int insert_sep; /* Inserted token separator flag*/
extern int mkdep; /* Output source file dependency*/
extern int mbchar; /* Encoding of multi-byte char */
extern int mbchk; /* Possible multi-byte char */
extern int bsl_in_mbchar; /* 2nd byte of mbchar has '\\' */
extern int bsl_need_escape;/* '\\' in mbchar should be escaped */
extern int mcpp_debug; /* Class of debug information */
extern long in_asm; /* In #asm - #endasm block */
extern jmp_buf error_exit; /* Exit on fatal error */
extern char * cur_fullname; /* Full name of current source */
extern short * char_type; /* Character classifier */
extern char * workp; /* Free space in work[] */
extern char * const work_end; /* End of work[] buffer */
extern char identifier[]; /* Lastly scanned name */
extern IFINFO ifstack[]; /* Information of #if nesting */
extern char work_buf[];
extern FILEINFO * sh_file;
extern int sh_line;
/* Temporary buffer for directive line and macro expansion */
/* main.c */
extern void un_predefine( int clearall);
/* Undefine predefined macros */
/* directive.c */
extern void directive( void);
/* Process #directive line */
extern DEFBUF * do_define( int ignore_redef, int predefine);
/* Do #define directive */
extern DEFBUF * look_id( const char * name);
/* Look for a #define'd thing */
extern DEFBUF ** look_prev( const char * name, int * cmp);
/* Look for place to insert def.*/
extern DEFBUF * look_and_install( const char * name, int numargs
, const char * parmnames, const char * repl);
/* Look and insert macro def. */
extern DEFBUF * install_macro( const char * name, int numargs
, const char * parmnames, const char * repl, DEFBUF ** prevp, int cmp
, int predefine);
/* Install a def to symbol table*/
extern int undefine( const char * name);
/* Delete from symbol table */
extern void dump_a_def( const char * why, const DEFBUF * dp, int newdef
, int comment, FILE * fp);
/* Dump a specific macro def */
extern void dump_def( int comment, int K_opt);
/* Dump current macro defs */
/* eval.c */
extern expr_t eval_if( void);
/* Evaluate #if expression */
extern VAL_SIGN * eval_num( const char * nump);
/* Evaluate preprocessing number*/
/* expand.c */
extern char * (* expand_macro)( DEFBUF * defp, char * out, char * out_end
, LINE_COL line_col, int * pragma_op);
/* Expand a macro completely */
extern void expand_init( int compat, int strict_ansi);
/* Initialize expand_macro() */
extern DEFBUF * is_macro( char ** cp);
/* The sequence is a macro call?*/
/* mbchar.c */
extern size_t (* mb_read)( int c1, char ** in_pp, char ** out_pp);
/* Read mbchar sequence */
extern const char * set_encoding( char * name, char * env, int pragma);
/* Multi-byte char encoding */
extern void mb_init( void);
/* Initialize mbchar variables */
extern uexpr_t mb_eval( char ** seq_pp);
/* Evaluate mbchar in #if */
extern int last_is_mbchar( const char * in, int len);
/* The line ends with MBCHAR ? */
/* support.c */
extern int get_unexpandable( int c, int diag);
/* Get next unexpandable token */
extern void skip_nl( void);
/* Skip to the end of the line */
extern int skip_ws( void);
/* Skip over white-spaces */
extern int scan_token( int c, char ** out_pp, char * out_end);
/* Get the next token */
extern char * scan_quote( int delim, char * out, char * out_end, int diag);
/* Scan a quoted literal */
extern int id_operator( const char * name);
/* Check identifier-like ops */
extern void expanding( const char * name, int to_be_freed);
/* Save macro name expanding */
extern void clear_exp_mac( void);
/* Clear expanding macro infs */
extern int get_ch( void);
/* Get the next char from input */
extern int cnv_trigraph( char * in);
/* Do trigraph replacement */
extern int cnv_digraph( char * in);
/* Convert digraphs to usual tok*/
extern void unget_ch( void);
/* Push back the char to input */
extern FILEINFO * unget_string( const char * text, const char * name);
/* Push back the string to input*/
extern char * save_string( const char * text);
/* Stuff string in malloc mem. */
extern FILEINFO * get_file( const char * name, const char * src_dir
, const char * fullname, size_t bufsize, int include_opt);
/* New FILEINFO initialization */
extern char * (xmalloc)( size_t size);
/* Get memory or die */
extern char * (xrealloc)( char * ptr, size_t size);
/* Reallocate memory or die */
extern LINE_COL * get_src_location( LINE_COL * p_line_col);
/* Get location on source file */
extern void cfatal( const char * format, const char * arg1, long arg2
, const char * arg3);
/* Print a fatal error and exit */
extern void cerror( const char * format, const char * arg1, long arg2
, const char * arg3);
/* Print an error message */
extern void cwarn( const char * format, const char * arg1, long arg2
, const char * arg3);
/* Print a warning message */
extern void dump_string( const char * why, const char * text);
/* Dump text readably */
extern void dump_unget( const char * why);
/* Dump all ungotten junk */
/* Support for alternate output mechanisms (e.g. memory buffers) */
extern int (* mcpp_fputc)( int c, OUTDEST od),
(* mcpp_fputs)( const char * s, OUTDEST od),
(* mcpp_fprintf)( OUTDEST od, const char * format, ...);
/* system.c */
extern void do_options( int argc, char ** argv, char ** in_pp
, char ** out_pp);
/* Process command line args */
extern void init_sys_macro( void);
/* Define system-specific macro */
extern void at_start( void);
/* Commands prior to main input */
extern void put_depend( const char * filename);
/* Output source dependency line*/
extern int do_include( int next);
/* Process #include directive */
extern void add_file( FILE * fp, const char * src_dir
, const char * filename, const char * fullname, int include_opt);
/* Chain the included file */
extern void sharp( FILEINFO * sharp_file, int marker);
/* Output # line number */
extern void do_pragma( void);
/* Process #pragma directive */
extern void put_asm( void);
/* Putout an asm code line */
extern void do_old( void);
/* Process older directives */
extern void at_end( void);
/* Do the final commands */
extern void print_heap( void);
/* Print blocks of heap memory */
#if ! HOST_HAVE_STPCPY
extern char * stpcpy( char * dest, const char * src);
/* Non-Standard library function*/
#endif
#if MCPP_LIB /* Setting to use mcpp as a subroutine */
/* directive.c */
extern void clear_symtable( void);
/* Free all macro definitions */
/* system.c */
extern void clear_filelist( void);
/* Free filename and directory list */
/* The following 5 functions are to Initialize static variables. */
/* directive.c */
extern void init_directive( void);
/* eval.c */
extern void init_eval( void);
/* support.c */
extern void init_support( void);
/* system.c */
extern void init_system( void);
#if HOST_COMPILER == GNUC && (SYS_FAMILY == SYS_UNIX && SYSTEM != SYS_CYGWIN)
#pragma GCC visibility pop
#endif
#endif
#if HOST_HAVE_STPCPY && !defined(stpcpy)
extern char * stpcpy( char * dest, const char * src);
#endif

1131
lib/mcpp/main.c Normal file

File diff suppressed because it is too large Load Diff

9
lib/mcpp/main_libmcpp.c Normal file
View File

@ -0,0 +1,9 @@
/* most simple sample source to use libmcpp */
#include "mcpp_lib.h"
int
main (int argc, char *argv[])
{
return mcpp_lib_main (argc, argv);
}

869
lib/mcpp/mbchar.c Normal file
View File

@ -0,0 +1,869 @@
/*-
* Copyright (c) 1998, 2002-2008 Kiyoshi Matsui <kmatsui@t3.rim.or.jp>
* All rights reserved.
*
* Some parts of this code are derived from the public domain software
* DECUS cpp (1984,1985) written by Martin Minow.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* M B C H A R . C
* C h a r a c t e r h a n d l i n g R o u t i n e s
*
* Character handling and multi-byte character handling routines are
* placed here.
*/
#if PREPROCESSED
#include "mcpp.H"
#else
#include "system.H"
#include "internal.H"
#endif
/*
* Tables of character types and multi-byte character types.
*
* Some of these character attributes will be overwritten by
* execution time option '-@post' or '-@old'.
* Warning on erroneous sequence will be issued from the caller routines:
* scan_quote(), scan_id() or scan_number().
*/
/* Non-ASCII characters are always checked by mb_read(). */
#define NA 0x4000 /* Non-ASCII characters */
/* Horizontal spaces (' ', '\t' and TOK_SEP) */
#define HSPA (SPA | HSP)
short * char_type; /* Pointer to one of the following type_*[]. */
#define EJ1 0x100 /* 1st byte of EUC_JP */
#define EJ2 0x200 /* 2nd byte of EUC_JP */
#define GB1 0x400 /* 1st byte of GB2312 */
#define GB2 0x800 /* 2nd byte of GB2312 */
#define KS1 0x1000 /* 1st byte of KSC5601 */
#define KS2 0x2000 /* 2nd byte of KSC5601 */
#define EJ12 (EJ1 | EJ2) /* 1st byte or 2nd byte of EUC_JP */
#define GB12 (GB1 | GB2)
#define KS12 (KS1 | KS2)
#define EJ1N (NA | EJ1)
#define EU12N (NA | EJ12 | GB12 | KS12)
/* 1st or 2nd byte of EUC_JP, GB2312 or KSC5601, or any other non-ASCII */
static short type_euc[ UCHARMAX + 1] = {
/*
* For EUC_JP, GB2312, KSC5601 or other similar multi-byte char encodings.
*/
/* Character type codes */
/* 0, 1, 2, 3, 4, 5, 6, 7, */
/* 8, 9, A, B, C, D, E, F, Hex */
000, 000, 000, 000, 000, 000, 000, 000, /* 00 */
000, HSPA, SPA, SPA, SPA, SPA, 000, 000, /* 08 */
000, 000, 000, 000, 000, 000, 000, 000, /* 10 */
/* 0x17-0x1A and 0x1F will be cleared in some modes by chk_opts() */
000, LET, LET, 000, 000, 000, 000, HSPA, /* 18 */
HSPA, PUNC, QUO, PUNC, 000, PUNC, PUNC, QUO, /* 20 !"#$%&' */
PUNC, PUNC, PUNC, PUNC, PUNC, PUNC, DOT, PUNC, /* 28 ()*+,-./ */
DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, /* 30 01234567 */
DIG, DIG, PUNC, PUNC, PUNC, PUNC, PUNC, PUNC, /* 38 89:;<=>? */
000, LET, LET, LET, LET, LET, LET, LET, /* 40 @ABCDEFG */
LET, LET, LET, LET, LET, LET, LET, LET, /* 48 HIJKLMNO */
LET, LET, LET, LET, LET, LET, LET, LET, /* 50 PQRSTUVW */
LET, LET, LET, PUNC, 000, PUNC, PUNC, LET, /* 58 XYZ[\]^_ */
000, LET, LET, LET, LET, LET, LET, LET, /* 60 `abcdefg */
LET, LET, LET, LET, LET, LET, LET, LET, /* 68 hijklmno */
LET, LET, LET, LET, LET, LET, LET, LET, /* 70 pqrstuvw */
LET, LET, LET, PUNC, PUNC, PUNC, PUNC, 000, /* 78 xyz{|}~ */
NA, NA, NA, NA, NA, NA, NA, NA, /* 80 .. 87 */
NA, NA, NA, NA, NA, NA, EJ1N, NA, /* 88 .. 8F */
NA, NA, NA, NA, NA, NA, NA, NA, /* 90 .. 97 */
NA, NA, NA, NA, NA, NA, NA, NA, /* 98 .. 9F */
NA, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, /* A0 .. A7 */
EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, /* A8 .. AF */
EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, /* B0 .. B7 */
EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, /* B8 .. BF */
EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, /* C0 .. C7 */
EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, /* C8 .. CF */
EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, /* D0 .. D7 */
EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, /* D8 .. DF */
EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, /* E0 .. E7 */
EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, /* E8 .. EF */
EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, /* F0 .. F7 */
EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, EU12N, NA, /* F8 .. FF */
};
static short type_bsl[ UCHARMAX + 1] = {
/*
* For SJIS, BIGFIVE or other similar encodings which may have '\\' value as
* the second byte of multi-byte character.
*/
#define SJ1 0x100 /* 1st byte of SJIS */
#define SJ2 0x200 /* 2nd byte of SJIS */
#define BF1 0x400 /* 1st byte of BIGFIVE */
#define BF2 0x800 /* 2nd byte of BIGFIVE */
#define SB2 (SJ2 | BF2)
#define SJ2N (NA | SJ2)
#define SB2N (NA | SJ2 | BF2)
#define SJ12N (NA | SJ1 | SJ2)
#define BF12N (NA | BF1 | BF2)
#define SB12N (NA | SJ1 | SJ2 | BF1 | BF2)
#define S2B12N (NA | SJ2 | BF1 | BF2)
#define LSB2 (LET | SB2)
#define PSB2 (PUNC| SB2)
/* Character type codes */
/* 0, 1, 2, 3, 4, 5, 6, 7, */
/* 8, 9, A, B, C, D, E, F, Hex */
000, 000, 000, 000, 000, 000, 000, 000, /* 00 */
000, HSPA, SPA, SPA, SPA, SPA, 000, 000, /* 08 */
000, 000, 000, 000, 000, 000, 000, 000, /* 10 */
/* 0x17-0x1A and 0x1F will be cleared in some modes by chk_opts() */
000, LET, LET, 000, 000, 000, 000, HSPA, /* 18 */
HSPA, PUNC, QUO, PUNC, 000, PUNC, PUNC, QUO, /* 20 !"#$%&' */
PUNC, PUNC, PUNC, PUNC, PUNC, PUNC, DOT, PUNC, /* 28 ()*+,-./ */
DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, /* 30 01234567 */
DIG, DIG, PUNC, PUNC, PUNC, PUNC, PUNC, PUNC, /* 38 89:;<=>? */
SB2, LSB2, LSB2, LSB2, LSB2, LSB2, LSB2, LSB2, /* 40 @ABCDEFG */
LSB2, LSB2, LSB2, LSB2, LSB2, LSB2, LSB2, LSB2, /* 48 HIJKLMNO */
LSB2, LSB2, LSB2, LSB2, LSB2, LSB2, LSB2, LSB2, /* 50 PQRSTUVW */
LSB2, LSB2, LSB2, PSB2, SB2, PSB2, PSB2, LSB2, /* 58 XYZ[\]^_ */
SB2, LSB2, LSB2, LSB2, LSB2, LSB2, LSB2, LSB2, /* 60 `abcdefg */
LSB2, LSB2, LSB2, LSB2, LSB2, LSB2, LSB2, LSB2, /* 68 hijklmno */
LSB2, LSB2, LSB2, LSB2, LSB2, LSB2, LSB2, LSB2, /* 70 pqrstuvw */
LSB2, LSB2, LSB2, PSB2, PSB2, PSB2, PSB2, 000, /* 78 xyz{|}~ */
SB2N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, /* 80 .. 87 */
SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, /* 88 .. 8F */
SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, /* 90 .. 97 */
SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, SJ12N, /* 98 .. 9F */
SJ2N, S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N, /* A0 .. A7 */
S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N, /* A8 .. AF */
S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N, /* B0 .. B7 */
S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N, /* B8 .. BF */
S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N, /* C0 .. C7 */
S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N, /* C8 .. CF */
S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N, /* D0 .. D7 */
S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N,S2B12N, /* D8 .. DF */
SB12N, SB12N, SB12N, SB12N, SB12N, SB12N, SB12N, SB12N, /* E0 .. E7 */
SB12N, SB12N, SB12N, SB12N, SB12N, SB12N, SB12N, SB12N, /* E8 .. EF */
SB12N, SB12N, SB12N, SB12N, SB12N, SB12N, SB12N, SB12N, /* F0 .. F7 */
SB12N, SB12N, SB12N, SB12N, SB12N, BF12N, BF12N, NA, /* F8 .. FF */
};
/*
* For ISO2022_JP multi-byte character encoding.
*/
#define IS1 0x100 /* 1st byte of shift-sequence */
#define IS2 0x200 /* 2nd byte of shift-sequence */
#define IS3 0x400 /* 3rd byte of shift-sequence */
#define IS4 0x800 /* 4th byte of shift-sequence */
#define IJP 0x1000 /* 1st or 2nd byte of ISO-2022-JP (ISO-2022-JP1) */
#define PIJP (PUNC | IJP)
#define QIJP (QUO | IJP)
#define DTJP (DOT | IJP)
#define DGJP (DIG | IJP)
#define LIJP (LET | IJP)
#define JPS2 (IJP | IS2)
#define PJPS23 (PIJP | IS2 | IS3)
#define LJPS3 (LIJP | IS3)
#define LJPS4 (LIJP | IS4)
static short type_iso2022_jp[ UCHARMAX + 1] = {
/* Character type codes */
/* 0, 1, 2, 3, 4, 5, 6, 7, */
/* 8, 9, A, B, C, D, E, F, Hex */
000, 000, 000, 000, 000, 000, 000, 000, /* 00 */
000, HSPA, SPA, SPA, SPA, SPA, 000, 000, /* 08 */
000, 000, 000, 000, 000, 000, 000, 000, /* 10 */
/* 0x17-0x1A and 0x1F will be cleared in some modes by chk_opts() */
000, LET, LET, IS1, 000, 000, 000, HSPA, /* 18 */
HSPA, PIJP, QIJP, PIJP, JPS2, PIJP, PIJP, QIJP, /* 20 !"#$%&' */
PJPS23,PIJP, PIJP, PIJP, PIJP, PIJP, DTJP, PIJP, /* 28 ()*+,-./ */
DGJP, DGJP, DGJP, DGJP, DGJP, DGJP, DGJP, DGJP, /* 30 01234567 */
DGJP, DGJP, PIJP, PIJP, PIJP, PIJP, PIJP, PIJP, /* 38 89:;<=>? */
IJP, LIJP, LJPS3, LIJP, LJPS4, LIJP, LIJP, LIJP, /* 40 @ABCDEFG */
LIJP, LIJP, LIJP, LIJP, LIJP, LIJP, LIJP, LIJP, /* 48 HIJKLMNO */
LIJP, LIJP, LIJP, LIJP, LIJP, LIJP, LIJP, LIJP, /* 50 PQRSTUVW */
LIJP, LIJP, LIJP, PIJP, IJP, PIJP, PIJP, LIJP, /* 58 XYZ[\]^_ */
IJP, LIJP, LIJP, LIJP, LIJP, LIJP, LIJP, LIJP, /* 60 `abcdefg */
LIJP, LIJP, LIJP, LIJP, LIJP, LIJP, LIJP, LIJP, /* 68 hijklmno */
LIJP, LIJP, LIJP, LIJP, LIJP, LIJP, LIJP, LIJP, /* 70 pqrstuvw */
LIJP, LIJP, LIJP, PIJP, PIJP, PIJP, PIJP, 000, /* 78 xyz{|}~ */
NA, NA, NA, NA, NA, NA, NA, NA, /* 80 .. 87 */
NA, NA, NA, NA, NA, NA, NA, NA, /* 88 .. 8F */
NA, NA, NA, NA, NA, NA, NA, NA, /* 90 .. 97 */
NA, NA, NA, NA, NA, NA, NA, NA, /* 98 .. 9F */
NA, NA, NA, NA, NA, NA, NA, NA, /* A0 .. A7 */
NA, NA, NA, NA, NA, NA, NA, NA, /* A8 .. AF */
NA, NA, NA, NA, NA, NA, NA, NA, /* B0 .. B7 */
NA, NA, NA, NA, NA, NA, NA, NA, /* B8 .. BF */
NA, NA, NA, NA, NA, NA, NA, NA, /* C0 .. C7 */
NA, NA, NA, NA, NA, NA, NA, NA, /* C8 .. CF */
NA, NA, NA, NA, NA, NA, NA, NA, /* D0 .. D7 */
NA, NA, NA, NA, NA, NA, NA, NA, /* D8 .. DF */
NA, NA, NA, NA, NA, NA, NA, NA, /* E0 .. E7 */
NA, NA, NA, NA, NA, NA, NA, NA, /* E8 .. EF */
NA, NA, NA, NA, NA, NA, NA, NA, /* F0 .. F7 */
NA, NA, NA, NA, NA, NA, NA, NA, /* F8 .. FF */
};
/*
* For UTF8 multi-byte character encoding.
*/
#define U2_1 0x100 /* 1st byte of 2-byte encoding of UTF8 */
#define U3_1 0x200 /* 1st byte of 3-byte encoding of UTF8 */
#define U4_1 0x400 /* 1st byte of 4-byte encoding of UTF8 */
#define UCONT 0x800 /* Continuation of a 2, 3, or 4 byte UTF8 sequence */
#define U2_1N (NA | U2_1)
#define U3_1N (NA | U3_1)
#define U4_1N (NA | U4_1)
#define UCONTN (NA | UCONT)
static short type_utf8[ UCHARMAX + 1] = {
/* Character type codes */
/* 0, 1, 2, 3, 4, 5, 6, 7, */
/* 8, 9, A, B, C, D, E, F, Hex */
000, 000, 000, 000, 000, 000, 000, 000, /* 00 */
000, HSPA, SPA, SPA, SPA, SPA, 000, 000, /* 08 */
000, 000, 000, 000, 000, 000, 000, 000, /* 10 */
/* 0x17-0x1A and 0x1F will be cleared in some modes by chk_opts() */
000, LET, LET, 000, 000, 000, 000, HSPA, /* 18 */
HSPA, PUNC, QUO, PUNC, 000, PUNC, PUNC, QUO, /* 20 !"#$%&' */
PUNC, PUNC, PUNC, PUNC, PUNC, PUNC, DOT, PUNC, /* 28 ()*+,-./ */
DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, /* 30 01234567 */
DIG, DIG, PUNC, PUNC, PUNC, PUNC, PUNC, PUNC, /* 38 89:;<=>? */
000, LET, LET, LET, LET, LET, LET, LET, /* 40 @ABCDEFG */
LET, LET, LET, LET, LET, LET, LET, LET, /* 48 HIJKLMNO */
LET, LET, LET, LET, LET, LET, LET, LET, /* 50 PQRSTUVW */
LET, LET, LET, PUNC, 000, PUNC, PUNC, LET, /* 58 XYZ[\]^_ */
000, LET, LET, LET, LET, LET, LET, LET, /* 60 `abcdefg */
LET, LET, LET, LET, LET, LET, LET, LET, /* 68 hijklmno */
LET, LET, LET, LET, LET, LET, LET, LET, /* 70 pqrstuvw */
LET, LET, LET, PUNC, PUNC, PUNC, PUNC, 000, /* 78 xyz{|}~ */
UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN, /* 80 .. 87 */
UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN, /* 88 .. 8F */
UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN, /* 90 .. 97 */
UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN, /* 98 .. 9F */
UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN, /* A0 .. A7 */
UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN, /* A8 .. AF */
UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN, /* B0 .. B7 */
UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN,UCONTN, /* B8 .. BF */
NA, NA, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, /* C0 .. C7 */
U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, /* C8 .. CF */
U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, /* D0 .. D7 */
U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, U2_1N, /* D8 .. DF */
U3_1N, U3_1N, U3_1N, U3_1N, U3_1N, U3_1N, U3_1N, U3_1N, /* E0 .. E7 */
U3_1N, U3_1N, U3_1N, U3_1N, U3_1N, U3_1N, U3_1N, U3_1N, /* E8 .. EF */
U4_1N, U4_1N, U4_1N, U4_1N, U4_1N, NA, NA, NA, /* F0 .. F7 */
NA, NA, NA, NA, NA, NA, NA, NA, /* F8 .. FF */
};
#define SETLOCALE 2 /* #pragma setlocale (not __setlocale) */
#define NUM_ENCODING 8
#define NUM_ALIAS 6
/* Names of encoding recognized. Table for search_encoding(). */
static const char * const encoding_name[ NUM_ENCODING][ NUM_ALIAS] = {
/* Visual C full, Visual C short
, 4 miscellaneous */
{ "english", "c"
, "c", "en", "latin", "iso8859"},
{ "", ""
, "eucjp", "euc", "ujis", ""},
{ "chinesesimplified", "chs"
, "gb2312", "cngb", "euccn", ""},
{ "korean", "kor"
, "ksc5601", "ksx1001", "wansung", "euckr"},
{ "japanese", "jpn"
, "sjis", "shiftjis", "mskanji", ""},
{ "chinesetraditional", "cht"
, "bigfive", "big5", "cnbig5", "euctw"},
{ "", ""
, "iso2022jp", "iso2022jp1", "jis", ""},
{ "", ""
, "utf8", "utf", "", ""},
};
static int mbstart;
static int mb2;
static size_t mb_read_2byte( int c1, char ** in_pp, char ** out_pp);
/* For 2-byte encodings of mbchar */
static const char * search_encoding( char * norm, int alias);
/* Search encoding_name[][] table */
static void strip_bar( char * string);
/* Remove '_', '-' or '.' in the string */
static void conv_case( char * name, char * lim, int upper);
/* Convert to upper/lower case */
static size_t mb_read_iso2022_jp( int c1, char ** in_pp, char ** out_pp);
/* For ISO2022_JP encoding */
static size_t mb_read_utf8( int c1, char ** in_pp, char ** out_pp);
/* For UTF8 mbchar encoding */
#define NAMLEN 20
#define UPPER 1 /* To upper */
#define LOWER 0 /* To lower */
const char * set_encoding(
char * name, /* Name of encoding specified */
char * env, /* Name of environment variable */
int pragma
/* 2: #pragma setlocale, 1: #pragma __setlocale, 0: not #pragma */
)
/*
* Search the encoding specified and re-initialize mbchar settings.
*/
{
const char * unknown_encoding
= "Unknown encoding: %s%.0ld%.0s"; /* _W1_ */
const char * too_long
= "Too long encoding name: %s%.0ld%.0s"; /* _E_ */
const char * loc = "";
int alias;
char norm[ NAMLEN];
/*
* Normalized name (removed 'xxxxx.', stripped '_', '-', '.'
* and lowered.
*/
if (strlen( name) >= NAMLEN) {
if ((env || pragma) && (warn_level & 1)) {
cwarn( too_long, name, 0L, NULL);
} else {
mcpp_fprintf( ERR, too_long, name);
mcpp_fputc( '\n', ERR);
}
}
strcpy( norm, name);
if (norm[ 5] == '.')
memmove( norm, norm + 5, strlen( norm + 5) + 1);
/* Remove initial 'xxxxx.' as 'ja_JP.', 'en_US.' or any other */
conv_case( norm, norm + strlen( norm), LOWER);
strip_bar( norm);
if (strlen( name) == 0) { /* "" */
mbchar = MBCHAR; /* Restore to the default encoding */
} else if (memcmp( norm, "iso8859", 7) == 0 /* iso8859* */
|| memcmp( norm, "latin", 5) == 0 /* latin* */
|| memcmp( norm, "en", 2) == 0) { /* en* */
mbchar = 0; /* No multi-byte character */
} else {
alias = 2;
#if COMPILER == MSC
if (pragma == SETLOCALE) /* #pragma setlocale */
alias = 0;
#endif
loc = search_encoding( norm, alias); /* Search the name */
}
if (loc == NULL) {
if ((env || pragma) && (warn_level & 1)) {
cwarn( unknown_encoding, name, 0L, NULL);
} else { /* -m option */
mcpp_fprintf( ERR, unknown_encoding, name);
mcpp_fputc( '\n', ERR);
}
} else {
mb_init(); /* Re-initialize */
}
return loc;
}
static const char * search_encoding(
char * norm, /* The name of encoding specified */
int alias /* The number of alias to start searching */
)
{
const char * loc;
int lo, al;
for (lo = 0; lo < NUM_ENCODING; lo++) {
for (al = alias ; al < NUM_ALIAS; al++) {
loc = encoding_name[ lo][ al];
if (str_eq( loc, norm)) {
switch (lo) {
case 0 : mbchar = 0; break;
case 1 : mbchar = EUC_JP; break;
case 2 : mbchar = GB2312; break;
case 3 : mbchar = KSC5601; break;
case 4 : mbchar = SJIS; break;
case 5 : mbchar = BIGFIVE; break;
case 6 : mbchar = ISO2022_JP; break;
case 7 : mbchar = UTF8; break;
}
return loc;
}
}
}
return NULL;
}
static void strip_bar(
char * string
)
/*
* Strip '_', '-' or '.' in the string.
*/
{
char * cp = string;
while (*cp != EOS) {
if (*cp == '_' || *cp == '-' || *cp == '.')
memmove( cp, cp + 1, strlen( cp));
else
cp++;
}
}
static void conv_case(
char * name, /* (diretory) Name */
char * lim, /* End of (directory) name */
int upper /* TRUE if to upper */
)
/* Convert a string to upper-case letters or lower-case letters in-place */
{
int c;
char * sp;
for (sp = name; sp < lim; sp++) {
c = *sp & UCHARMAX;
#if MBCHAR
if ((char_type[ c] & mbstart)) {
char tmp[ PATHMAX+1];
char * tp = tmp;
*tp++ = *sp++;
mb_read( c, &sp, &tp);
} else
#endif
{
if (upper)
*sp = toupper( c);
else
*sp = tolower( c);
}
}
}
void mb_init( void)
/*
* Initialize multi-byte character settings.
* First called prior to setting the 'mcpp_mode'.
* Will be called again each time the multibyte character encoding is changed.
*/
{
/*
* Select the character classification table, select the multi-byte
* character reading routine and decide whether multi-byte character
* may contain the byte of value 0x5c.
*/
switch (mbchar) {
case 0 :
case EUC_JP :
case GB2312 :
case KSC5601 :
char_type = type_euc;
bsl_in_mbchar = FALSE;
mb_read = mb_read_2byte;
break;
case SJIS :
case BIGFIVE :
char_type = type_bsl;
bsl_in_mbchar = TRUE;
mb_read = mb_read_2byte;
break;
case ISO2022_JP :
char_type = type_iso2022_jp;
bsl_in_mbchar = TRUE;
mb_read = mb_read_iso2022_jp;
break;
case UTF8 :
char_type = type_utf8;
bsl_in_mbchar = FALSE;
mb_read = mb_read_utf8;
break;
}
/* Set the bit patterns for character classification. */
switch (mbchar) {
case 0 :
mbstart = 0;
break;
case EUC_JP :
mbstart = EJ1;
mb2 = EJ2;
break;
case GB2312 :
mbstart = GB1;
mb2 = GB2;
break;
case KSC5601:
mbstart = KS1;
mb2 = KS2;
break;
case SJIS :
mbstart = SJ1;
mb2 = SJ2;
break;
case BIGFIVE:
mbstart = BF1;
mb2 = BF2;
break;
case ISO2022_JP :
mbstart = IS1;
break;
case UTF8 :
mbstart = (U2_1 | U3_1 | U4_1);
break;
}
switch (mbchar) {
case 0 :
mbchk = 0;
break;
case EUC_JP :
case GB2312 :
case KSC5601:
case SJIS :
case BIGFIVE:
case UTF8 :
mbchk = NA;
break;
case ISO2022_JP :
mbchk = (IS1 | NA);
break;
}
/*
* Set special handling for some encodings to supplement some compiler's
* deficiency.
*/
switch (mbchar) {
case SJIS :
#if ! SJIS_IS_ESCAPE_FREE
bsl_need_escape = TRUE;
#endif
break;
case BIGFIVE:
#if ! BIGFIVE_IS_ESCAPE_FREE
bsl_need_escape = TRUE;
#endif
break;
case ISO2022_JP :
#if ! ISO2022_JP_IS_ESCAPE_FREE
bsl_need_escape = TRUE;
#endif
break;
default :
bsl_need_escape = FALSE;
break;
}
/*
* Modify magic characters in character type table.
* char_type[] table should be rewritten in accordance with the 'mcpp_mode'
* whenever the encoding is changed.
*/
if (mcpp_mode) { /* If mcpp_mode is already set */
char_type[ DEF_MAGIC] = standard ? LET : 0;
char_type[ IN_SRC] = (mcpp_mode == STD) ? LET : 0;
char_type[ TOK_SEP] = (mcpp_mode == STD || mcpp_mode == OLD_PREP)
? HSPA: 0; /* TOK_SEP equals to COM_SEP */
}
}
static size_t mb_read_2byte(
int c1, /* The 1st byte of mbchar sequence (already read) */
char ** in_pp, /* Pointer to input */
char ** out_pp /* Pointer to output */
)
/*
* Multi-byte character reading routine for 2-byte encodings.
*/
{
int error = FALSE;
size_t len = 0; /* Number of multi-byte characters read. */
char * in_p = *in_pp;
char * out_p = *out_pp;
if (! (char_type[ c1 & UCHARMAX] & mbstart))
return MB_ERROR; /* Not a multi-byte character */
do {
if (! (char_type[ (*out_p++ = *in_p++) & UCHARMAX] & mb2)) {
error = TRUE;
break;
}
len++;
} while (char_type[ (*out_p++ = *in_p++) & UCHARMAX] & mbstart);
*in_pp = --in_p;
*(--out_p) = EOS;
*out_pp = out_p;
return error ? (len | MB_ERROR) : len;
}
static size_t mb_read_iso2022_jp(
int c1, /* The 1st byte of the sequence already read (always 0x1b). */
char ** in_pp,
char ** out_pp
)
/*
* Multi-byte character reading routine for ISO2022_JP.
*/
{
int error = FALSE;
size_t len = 0;
char * in_p = *in_pp;
char * out_p = *out_pp;
int c2, c3, c4;
if (! (char_type[ c1 & UCHARMAX] & mbstart))
return MB_ERROR;
do {
*out_p++ = c2 = *in_p++;
if (! (char_type[ c2 & UCHARMAX] & IS2)) {
error = TRUE;
break;
}
*out_p++ = c3 = *in_p++;
if (! (char_type[ c3 & UCHARMAX] & IS3)) {
error = TRUE;
break;
}
switch (c2) {
case 0x24 :
switch (c3) {
case 0x42 : /* 0x1b 0x24 0x42: JIS X 0208-1983 */
break;
case 0x28 :
*out_p++ = c4 = *in_p++;
if (! (char_type[ c4 & UCHARMAX] & IS4))
error = TRUE;
/* else: 0x1b 0x24 0x28 0x44: JIS X 0212 */
break;
default :
error = TRUE;
}
break;
case 0x28 :
switch (c3) {
case 0x42 : /* 0x1b 0x28 0x42: ASCII */
c1 = *out_p++ = *in_p++ & UCHARMAX;
continue;
default :
error = TRUE;
}
break;
}
if (error)
break;
while (char_type[ c1 = *out_p++ = (*in_p++ & UCHARMAX)] & IJP) {
if (! (char_type[ *out_p++ = (*in_p++ & UCHARMAX)] & IJP)) {
error = TRUE;
break;
}
len++; /* String of multi-byte characters */
}
if (error)
break;
} while (char_type[ c1] & IS1); /* 0x1b: start of shift-sequence */
*in_pp = --in_p;
*(--out_p) = EOS;
*out_pp = out_p;
return error ? (len | MB_ERROR) : len;
}
static size_t mb_read_utf8(
int c1,
char ** in_pp,
char ** out_pp
)
/*
* Multi-byte character reading routine for UTF8.
*/
{
int error = FALSE;
size_t len = 0;
char * in_p = *in_pp;
char * out_p = *out_pp;
if (! (char_type[ c1 & UCHARMAX] & mbstart))
return MB_ERROR;
do {
unsigned int codepoint;
int i, bytes;
if ((char_type[ c1 & UCHARMAX] & U4_1) == U4_1)
bytes = 4; /* 4-byte character */
else if ((char_type[ c1 & UCHARMAX] & U3_1) == U3_1)
bytes = 3; /* 3-byte character */
else if ((char_type[ c1 & UCHARMAX] & U2_1) == U2_1)
bytes = 2; /* 2-byte character */
/* Must ensure that the sequence is not reserved as a surrogate */
codepoint = ((2 << (6-bytes)) - 1) & c1; /* mask off top bits */
/* All bytes left in the sequence must be in 0x80 - 0xBF */
for (i = bytes - 1; i && !error; i--) {
codepoint = (codepoint << 6) + ((*in_p) & 0x3fU);
if (! (char_type[ (*out_p++ = *in_p++) & UCHARMAX] & UCONT))
error = TRUE;
}
/* Check for overlong/underlong sequences */
if ((bytes == 2 && (codepoint < 0x80 || codepoint > 0x7FF))
|| (bytes == 3 && (codepoint < 0x800 || codepoint > 0xFFFF))
|| (bytes == 4 && (codepoint < 0x10000 || codepoint > 0x10FFFF)))
error = TRUE;
if ((codepoint >= 0xD800 && codepoint <= 0xDFFF)
/* Check for reserved surrogate codepoints */
|| (codepoint >= 0xFFFE && codepoint <= 0xFFFF))
/* Illegal */
error = TRUE;
#if 0
printf( "codepoint:0x%x\n", codepoint);
#endif
if (error)
break;
len++;
} while (char_type[ (*out_p++ = c1 = *in_p++) & UCHARMAX] & mbstart);
/* Start of the next multi-byte character */
*in_pp = --in_p;
*(--out_p) = EOS;
*out_pp = out_p;
return error ? (len | MB_ERROR) : len;
}
uexpr_t mb_eval(
char ** seq_pp
)
/*
* Evaluate the value of a multi-byte character.
* This routine does not check the legality of the sequence.
* This routine is called from eval_char().
* This routine is never called in POST_STD mode.
*/
{
char * seq = *seq_pp;
uexpr_t val = 0;
int c, c1;
if (! (char_type[ c = *seq++ & UCHARMAX] & mbstart)) {
*seq_pp = seq;
return c; /* Not a multi-byte character */
}
switch (mbchar) {
case EUC_JP :
case GB2312 :
case KSC5601:
case SJIS :
case BIGFIVE:
val = (c << 8) + (*seq++ & UCHARMAX);
/* Evaluate the 2-byte sequence */
break;
case ISO2022_JP :
if (char_type[ c & UCHARMAX] & IS1) { /* Skip shift-sequence */
if (char_type[ c = *seq++ & UCHARMAX] & IS2) {
if (char_type[ c1 = *seq++ & UCHARMAX] & IS3) {
if (c1 == 0x28)
seq++;
if (c == 0x28 && c1 == 0x42) { /* Shift-out sequence */
val = 0;
break;
}
c = *seq++ & UCHARMAX;
}
}
}
val = (c << 8) + (*seq++ & UCHARMAX); /* Evaluate the 2-bytes */
break;
case UTF8 : /* Evaluate the sequence of 2, 3 or 4 bytes as it is */
val = (c << 8) + (*seq++ & UCHARMAX);
if (char_type[ c & UCHARMAX] & U3_1) {
val = (val << 8) + (*seq++ & UCHARMAX);
} else if (char_type[ c & UCHARMAX] & U4_1) {
val = (val << 8) + (*seq++ & UCHARMAX);
val = (val << 8) + (*seq++ & UCHARMAX);
}
break;
}
*seq_pp = seq;
return val;
}
int last_is_mbchar(
const char * in, /* Input physical line */
int len /* Length of the line minus 2 */
)
/*
* Return 2, if the last char of the line is second byte of SJIS or BIGFIVE,
* else return 0.
*/
{
const char * cp = in + len;
const char * const endp = in + len; /* -> the char befor '\n' */
if ((mbchar & (SJIS | BIGFIVE)) == 0)
return 0;
while (in <= --cp) { /* Search backwardly */
if ((char_type[ *cp & UCHARMAX] & mbstart) == 0)
break; /* Not the first byte of MBCHAR */
}
if ((endp - cp) & 1)
return 0;
else
return 2;
}

31
lib/mcpp/mcpp_lib.h Normal file
View File

@ -0,0 +1,31 @@
/* mcpp_lib.h: declarations of libmcpp exported (visible) functions */
#ifndef _MCPP_LIB_H
#define _MCPP_LIB_H
#ifndef _MCPP_OUT_H
#include "mcpp_out.h" /* declaration of OUTDEST */
#endif
#if _WIN32 || _WIN64 || __CYGWIN__ || __CYGWIN64__ || __MINGW32__ \
|| __MINGW64__
#if DLL_EXPORT || (__CYGWIN__ && PIC)
#define DLL_DECL __declspec( dllexport)
#elif DLL_IMPORT
#define DLL_DECL __declspec( dllimport)
#else
#define DLL_DECL
#endif
#else
#define DLL_DECL
#endif
extern DLL_DECL int mcpp_lib_main( int argc, char ** argv);
extern DLL_DECL void mcpp_reset_def_out_func( void);
extern DLL_DECL void mcpp_set_out_func(
int (* func_fputc) ( int c, OUTDEST od),
int (* func_fputs) ( const char * s, OUTDEST od),
int (* func_fprintf)( OUTDEST od, const char * format, ...)
);
extern DLL_DECL void mcpp_use_mem_buffers( int tf);
extern DLL_DECL char * mcpp_get_mem_buffer( OUTDEST od);
#endif /* _MCPP_LIB_H */

13
lib/mcpp/mcpp_out.h Normal file
View File

@ -0,0 +1,13 @@
/* mcpp_out.h: declarations of OUTDEST data types for MCPP */
#ifndef _MCPP_OUT_H
#define _MCPP_OUT_H
/* Choices for output destination */
typedef enum {
OUT, /* ~= fp_out */
ERR, /* ~= fp_err */
DBG, /* ~= fp_debug */
NUM_OUTDEST
} OUTDEST;
#endif /* _MCPP_OUT_H */

9
lib/mcpp/preproc.c Normal file
View File

@ -0,0 +1,9 @@
/* preproc.c: to "pre-preprocess" header files. */
#pragma MCPP preprocess
#include "system.H"
#include "internal.H"
#pragma MCPP put_defines

2811
lib/mcpp/support.c Normal file

File diff suppressed because it is too large Load Diff

396
lib/mcpp/system.H Normal file
View File

@ -0,0 +1,396 @@
/*-
* Copyright (c) 1998, 2002-2008 Kiyoshi Matsui <kmatsui@t3.rim.or.jp>
* All rights reserved.
*
* Some parts of this code are derived from the public domain software
* DECUS cpp (1984,1985) written by Martin Minow.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* S Y S T E M . H
* S y s t e m D e p e n d e n t
* D e f i n i t i o n s f o r M C P P
*
* Definitions in this file may be edited to configure MCPP for particular
* operating systems and compiler configurations.
*
* Note: MCPP assumes the system implement the Ascii character set.
* If this is not the case, you will have to do some editing here and there.
*/
#define SYSTEM_H
#if HAVE_CONFIG_H
#include "configed.H"
#else
#include "noconfig.H"
#endif
/* Some system has a wrong definition of UCHAR_MAX. See cpp-test.html#5.1.3 */
#if UCHARMAX < -255
/* The definition of UCHARMAX (possibly UCHAR_MAX too) is wrong. */
/* Define it as a signed int value, not as an unsigned value. */
#undef UCHARMAX
#define UCHARMAX ((1 << CHAR_BIT) - 1)
#endif
/*
* PART 1 and PART 2 are defined in the above header files.
*/
/*
* P A R T 1 Configurations for target-operating-system
* and target-compiler.
*/
/*
* P A R T 2 Configurations for host-compiler.
*/
/*
* P A R T 3 Configurations for default settings, typedefs and
* translation limits.
*/
/*
* The variable 'mcpp_mode' specifies the mode of preprocessing as one of
* OLD_PREP, KR, STD, or POST_STD.
* Those modes have many differences each other --
* i.e. handling of translation phases; handling of some
* preprocessing tokens; availability of some directives; way of
* macro expansion;
* -- as follows.
*
* KR Actual arguments of a macro are expanded (after substitution)
* with rest of the replacement text and the subsequent source text.
* ## in macro definition has no significance to cpp. The surrounding
* tokens are macro-expanded separately. Also, # has no significance
* to cpp. The following token is expanded.
* Directly or intermediately recursive macro call causes an error.
* <backslash><newline> sequence is deleted only in string literals
* and in #define directive lines.
* sizeof (type) can be used in #if line.
* KR corresponds to the "K&R 1st."
*
* OLD_PREP In addition to the KR specifications, this mode has the
* following characteristics (and some others).
* Converts comment to 0 space instead of 1 space.
* Expands the parameter like spelling in string literal as a macro.
* Does not check unmatched pair of '"' or '\''.
* OLD_PREP corresponts to "Reiser model" cpp.
*
* STD Standard conforming mode.
* <backslash><newline> sequence is always deleted after trigraph
* conversion and before tokenization.
* Digraph sequences are recognized as tokens.
* Actual arguments of a macro are expanded separately prior to
* re-scanning of the replacement text.
* The name in the replacement text once expanded is not re-expanded,
* thus preventing recursive death.
* ## in macro definition concatenates tokens. The tokens are not
* expanded. The concatenated token is expanded by rescanning.
* # in macro definition stringizes the following argument. The argument
* is not expanded. \ is inserted before " and \ in or surrounding
* the string literal or character constant.
* An expanded macro is surrounded by spaces to prevent unintended
* token merging.
*
* POST_STD This mode simplifies the behavior of STD mode as follows.
* 1. Digraph sequences are converted in translation phase 1, as
* alternate characters rather than as tokens.
* 2. A space is inserted as a token separator between any tokens in
* a source (except a macro name and the next '(' in macro
* definition): thus simplifying tokenization, test of macro
* redefinition and macro expansion, especially "stringization".
* 3. Test of macro redefinition ignores difference of parameter names,
* test of which has little utility and not a little overhead.
* 4. #if expression forbids character constants, which have little
* portability, little utility and not a little overhead.
* 5. Rescanning of a macro expansion is limited in the replacement
* text, rest of the source file is not scanned, thus making the
* syntax of "function-like" macro call more similar to that of
* function call.
* 6. Argument of #include directive in <header.h> form is an obsolescent
* feature.
* 7. '$' or so are not treated specially in #define directive.
* 8. Trigraphs, UCN (universal-character name) are not recognized.
* 9. Multi-byte characters in an identifier are not recognized.
*
* The following specifications are available when mode is STD or POST_STD.
* preprocessing number token, digraphs,
* #pragma (#pragma MCPP put_defines, #pragma MCPP warning
* , #pragma MCPP debug) directive,
* #error directive,
* #if defined operator, #elif directive,
* predefined macros __FILE__, __LINE__, __DATE__, __TIME__
* , __STDC__, __STDC_VERSION__, __STDC_HOSTED__,
* wide character constant, wide character string literal,
* _Pragma() operator, variable-arguments macro,
* macro as an argument of #include, #line directives,
* escape sequences \x[hex-digits], \a, \v,
* '+' option (C++ preprocessing),
* 'S<n>' option (re-defines __STDC__ as <n>, unpredefine some macros),
* 'V<n>' option (re-defines __STDC_VERSION__ or __cplusplus as <n>),
* 'h<n>' option (re-defines __STDC_HOSTED__ as <n>).
* The following specifications are available only in STD mode.
* Trigraphs and UCN,
* Multi-byte characters in an identifier.
* The following specifications are available only in KR and OLD_PREP modes.
* #assert, #asm, #endasm, #put_defines, #debug and some other older
* directives,
* argument of #line directive other than decimal-digits.
*/
/* The values of 'mcpp_mode'. */
#define OLD_PREP 1 /* "Reiser" cpp mode */
#define KR 2 /* K&R 1st mode */
#define STD 3 /* Standard moce */
#define POST_STD 9 /* Special mode of MCPP */
/*
* TRIGRAPHS_INIT Initial value for the -3 option. If TRUE -3
* disables trigraphs, if FALSE -3 enables them.
* DIGRAPHS_INIT Initial value for the -2 option. If TRUE -2
* disables digraphs, if FALSE -2 enables them.
* OK_UCN Enable recognition of Universal-Character-Name sequence
* by -V199901L option.
* OK_MBIDENT Enable multi-byte characters in identifier by -V199901L
* option.
* EXPAND_PRAGMA Enable macro expansion of #pragma line (even in modes
* other than C99).
* expr_t, uexpr_t Type of maximum integer:
* long long (unsigned long long) or longer.
* EXPR_MAX should be defined to the maximum value of uexpr_t.
*/
#define TRIGRAPHS_INIT FALSE
#define DIGRAPHS_INIT FALSE
#ifndef EXPAND_PRAGMA
#define EXPAND_PRAGMA FALSE
#endif
#define OK_UCN TRUE
#define OK_MBIDENT FALSE
#if HAVE_INTMAX_T
#if HAVE_STDINT_H
#include "stdint.h"
#elif HAVE_INTTYPES_H
#include "inttypes.h"
#endif
typedef intmax_t expr_t;
typedef uintmax_t uexpr_t;
#else
#if HAVE_LONG_LONG
#if (HOST_COMPILER == MSC && _MSC_VER < 1500) || HOST_COMPILER == BORLANDC
typedef __int64 expr_t;
typedef unsigned __int64 uexpr_t;
#else
typedef long long expr_t;
typedef unsigned long long uexpr_t;
#endif
#else /* !HAVE_LONG_LONG */
typedef unsigned long uexpr_t;
typedef long expr_t;
#endif /* HAVE_LONG_LONG */
#endif /* HAVE_INTMAX_T */
#if HAVE_INTMAX_T
#define EXPR_MAX UINTMAX_MAX
#elif HAVE_LONG_LONG
#if (HOST_COMPILER == MSC && _MSC_VER < 1400) || HOST_COMPILER == BORLANDC
#define EXPR_MAX 0xFFFFFFFFFFFFFFFFui64
#else
#define EXPR_MAX 0xFFFFFFFFFFFFFFFFULL
#endif
#else
#define EXPR_MAX 4294967295UL
#endif
/*
* Translation limits.
* The following definitions are used to allocate memory for work buffers.
*
* NWORK Output buffer size. Set this size according to your compiler-
* proper. Length of string literal should be less than NWORK
* - 1.
* Nevertheless, when COMPILER == GNUC || COMPILER == MSC, mcpp
* uses NMACWORK as output buffer size because GNUC and Visual C
* can accept very long line.
* NBUFF Input buffer size after line concatenation by <backslash>
* <newline>.
* NMACWORK Internal work buffer size for macro definition and expansion.
* IDMAX The longest identifier length.
* NMACPARS The maximum number of #define parameters.
* NOTE: Must be NMACPARS <= UCHARMAX.
* NEXP The maximum nesting depth of #if expressions.
* BLK_NEST The number of nested #if's permitted.
* INCLUDE_NEST The maximum nesting depth of #include. This is needed to
* prevent infinite recursive inclusion.
* RESCAN_LIMIT The maximum rescan times of macro expansion in STD or POST_STD
* modes.
* PRESTD_RESCAN_LIMIT The maximum rescan times of macro expansion in KR or
* OLD_PREP modes..
*
* NBUFF should not be smaller than NWORK.
* NMACWORK should not be smaller than NWORK * 2.
*
* SBSIZE defines the number of hash-table slots for the macro symbol table.
* It must be a power of 2.
*
* MKDEP_INIT The initial maximum number of filenames in a dependency line
* of output of -M* option. The maximum number is dynamically
* enlarged in execution.
*/
#ifndef IDMAX
#define IDMAX 0x400
#endif
#ifndef NMACPARS
#define NMACPARS 0xFF
#endif
#ifndef NEXP
#define NEXP 0x100
#endif
#ifndef BLK_NEST
#define BLK_NEST 0x100
#endif
#ifndef INCLUDE_NEST
#define INCLUDE_NEST 0x100
#endif
#ifndef RESCAN_LIMIT
#define RESCAN_LIMIT 0x40
#endif
#ifndef PRESTD_RESCAN_LIMIT
#define PRESTD_RESCAN_LIMIT 0x100
#endif
#ifndef NBUFF
#define NBUFF 0x10000 /* Must be NWORK <= NBUFF */
#endif
#ifndef NWORK
#define NWORK NBUFF /* 0x1000, 0x4000, 0x10000, .. */
#endif
#ifndef NMACWORK
#define NMACWORK (NWORK * 4) /* Must be NWORK * 2 <= NMACWORK */
#endif
#ifndef SBSIZE
#define SBSIZE 0x400
#endif
#ifndef MKDEP_INIT
#define MKDEP_INIT 0x100
#endif
#if UCHARMAX < NMACPARS
#error "NMACPARS should not be greater than UCHARMAX"
#endif
#if NBUFF < NWORK
#error "NBUFF must be same or greater than NWORK"
#endif
#if NMACWORK < NWORK * 2
#error "NMACWORK must be same or greater than NWORK * 2"
#endif
#define SBMASK (SBSIZE - 1)
#if (SBSIZE ^ SBMASK) != ((SBSIZE * 2) - 1)
#error "SBSIZE must be a power of 2 !"
#endif
/*
* Translation limits required by the Standard.
*
* *90MIN limits specified by C90.
* *99MIN limits specified by C99.
* *_CPLUS_MIN limits recommended by C++ (ISO 1998/07 Standard).
*
* SLEN*MIN Characters in a logical source line
* and characters in a string literal or wide string literal
* (after concatenation).
* IDLEN*MIN Significant initial characters in an internal identifier
* or a macro name.
* NMACPARS*MIN Parameters in one macro definition.
* Arguments in one macro invocation.
* EXP_NEST*MIN Nesting levels of parenthesized expressions in a full
* expression.
* BLK_NEST*MIN Nesting levels of conditional inclusion.
* INCLUDE_NEST*MIN Nesting levels for #include files.
* NMACRO*MIN Macro identifiers simultaneously defined in one translation
* unit.
*/
#define SLEN90MIN 0x1FD
#define IDLEN90MIN 0x1F
#define NMACPARS90MIN 0x1F
#define EXP_NEST90MIN 0x20
#define BLK_NEST90MIN 8
#define INCLUDE_NEST90MIN 8
#define NMACRO90MIN 0x400
#define SLEN99MIN 0xFFF
#define IDLEN99MIN 0x3F
#define NMACPARS99MIN 0x7F
#define EXP_NEST99MIN 0x3F
#define BLK_NEST99MIN 0x3F
#define INCLUDE_NEST99MIN 0xF
#define NMACRO99MIN 0xFFF
#define SLEN_CPLUS_MIN 0x10000
#define IDLEN_CPLUS_MIN 0x400
#define NMACPARS_CPLUS_MIN 0x100
#define EXP_NEST_CPLUS_MIN 0x100
#define BLK_NEST_CPLUS_MIN 0x100
#define INCLUDE_NEST_CPLUS_MIN 0x100
#define NMACRO_CPLUS_MIN 0x10000
/* LINE99LIMIT means the line number limit of C99 */
#define LINE99LIMIT 0x7FFFFFFF
/*
* STDC This macro is used for the predefined __STDC__.
* STDC_VERSION is used for the value of __STDC_VERSION__.
* STDC_HOSTED is used for the value of __STDC_HOSTED__.
*/
#if IDMAX < IDLEN90MIN || NBUFF < SLEN90MIN + 3
|| NWORK < SLEN90MIN + 2 || NMACPARS < NMACPARS90MIN
|| NEXP < EXP_NEST90MIN || BLK_NEST < BLK_NEST90MIN
#define STDC 0
#endif
#ifndef STDC
#define STDC 1 /* 1 : for ISO 9899:1990 or later */
#endif
#ifndef STDC_VERSION
#define STDC_VERSION 0L /* 199409L : For conforming
implementation to ISO 9899:1990 / Amendment 1:1995
199901L : For C99 */
#endif
#ifndef STDC_HOSTED
#define STDC_HOSTED 1 /* 1 : for hosted implementation,
0 : for free-standing implementation (C99 specification) */
#endif
/*
* CPLUS specifies the default value of the pre-defined macro __cplusplus
* for C++ processing.
* The value can be changed by -V<n> option.
*/
#define CPLUS 1 /* 199711L for C++ Standard */

4940
lib/mcpp/system.c Normal file

File diff suppressed because it is too large Load Diff