Review macroes for character classification.

2025-07-24 10:25:30 -04:00 · 2019-07-25 19:13:40 +08:00 · 2019-07-25 19:13:40 +08:00 · 9c1ec4fdf8
commit 9c1ec4fdf8
parent e11ed9187f
6 changed files with 80 additions and 136 deletions
--- a/8
+++ b/8
@ -161,7 +161,7 @@ file.o: file.c file.h buffer.h line.h retcode.h utf8.h defines.h \
 fileio.o: fileio.c fileio.h defines.h retcode.h utf8.h
 flook.o: flook.c flook.h retcode.h defines.h fileio.h
 input.o: input.c input.h bind.h estruct.h bindable.h display.h utf8.h \
- exec.h retcode.h names.h terminal.h defines.h wrapper.h
+ exec.h retcode.h isa.h names.h terminal.h defines.h wrapper.h
 isearch.o: isearch.c isearch.h basic.h retcode.h buffer.h line.h utf8.h \
 display.h estruct.h exec.h input.h bind.h search.h terminal.h defines.h \
 util.h window.h
@ -184,8 +184,8 @@ random.o: random.c random.h retcode.h basic.h buffer.h line.h utf8.h \
 region.o: region.c region.h line.h retcode.h utf8.h buffer.h estruct.h \
 mlout.h random.h window.h defines.h
 search.o: search.c search.h line.h retcode.h utf8.h basic.h buffer.h \
- display.h estruct.h input.h bind.h mlout.h terminal.h defines.h util.h \
- window.h
+ display.h estruct.h input.h bind.h isa.h mlout.h terminal.h defines.h \
+ util.h window.h
 spawn.o: spawn.c spawn.h defines.h buffer.h line.h retcode.h utf8.h \
 display.h estruct.h exec.h file.h flook.h input.h bind.h terminal.h \
 window.h
@ -197,7 +197,7 @@ util.o: util.c util.h
 window.o: window.c window.h defines.h buffer.h line.h retcode.h utf8.h \
 basic.h display.h estruct.h execute.h terminal.h wrapper.h
 word.o: word.c word.h basic.h retcode.h buffer.h line.h utf8.h estruct.h \
- mlout.h random.h region.h window.h defines.h
+ isa.h mlout.h random.h region.h window.h defines.h
 wrapper.o: wrapper.c wrapper.h
 wscreen.o: wscreen.c wscreen.h

--- a/estruct.h
+++ b/estruct.h
@ -124,23 +124,13 @@

 #define	CLEAN	0  /* de-alloc memory on exit                      */

-#define ASCII	1  /* always using ASCII char sequences for now    */
-#define EBCDIC	0  /* later IBM mainfraim versions will use EBCDIC */
-
 #ifndef	AUTOCONF
-
-#define	XONXOFF	0  /* don't disable XON-XOFF flow control P.K.     */
-#define	NATIONL	0  /* interprete [,],\,{,},| as characters P.K.    */
-
+# define XONXOFF	0  /* don't disable XON-XOFF flow control P.K.     */
 #else
-
-#define	XONXOFF	UNIX
-#define	NATIONL	UNIX
-
+# define XONXOFF	UNIX
 #endif /* Autoconf. */

 #define	PKCODE	1      /* include my extensions P.K., define always    */
-#define	IBMCHR	MSDOS  /* use IBM PC character set P.K.                */
 #define SCROLLCODE 1   /* scrolling code P.K.                          */

 /* Define some ability flags. */
@ -157,65 +147,6 @@
 # define ENVFUNC	0
 #endif

-/* DIFCASE represents the integer difference between upper
-   and lower case letters.  It is an xor-able value, which is
-   fortunate, since the relative positions of upper to lower
-   case letters is the opposite of ascii in ebcdic.
-*/
-
-#ifdef	islower
-#undef	islower
-#endif
-
-#if	PKCODE
-#ifdef	isupper
-#undef	isupper
-#endif
-#endif
-
-#if	ASCII
-
-#define	DIFCASE		0x20
-
-#if	NATIONL
-#define LASTUL ']'
-#define LASTLL '}'
-#else
-#define LASTUL 'Z'
-#define LASTLL 'z'
-#endif
-
-#if	IBMCHR
-
-#define isletter(c)	(('a' <= c && LASTLL >= c) || ('A' <= c && LASTUL >= c) || (128<=c && c<=167))
-#define islower(c)	(('a' <= c && LASTLL >= c))
-#define isupper(c)	(('A' <= c && LASTUL >= c))
-
-#else
-
-#define isletter(c)	__isxletter((0xFF & (c)))
-#define islower(c)	isxlower((0xFF & (c)))
-#define isupper(c)	isxupper((0xFF & (c)))
-
-#define __isxletter(c)	(('a' <= c && LASTLL >= c) || ('A' <= c && LASTUL >= c) || (192<=c /* && c<=255 */))
-#define isxlower(c)	(('a' <= c && LASTLL >= c) || (224 <= c && 252 >= c))
-#define isxupper(c)	(('A' <= c && LASTUL >= c) || (192 <= c && 220 >= c))
-
-#endif
-
-#endif
-
-#if	EBCDIC
-
-#define	DIFCASE		0x40
-#define isletter(c)	(('a' <= c && 'i' >= c) || ('j' <= c && 'r' >= c) || ('s' <= c && 'z' >= c) || ('A' <= c && 'I' >= c) || ('J' <= c && 'R' >= c) || ('S' <= c && 'Z' >= c))
-#define islower(c)	(('a' <= c && 'i' >= c) || ('j' <= c && 'r' >= c) || ('s' <= c && 'z' >= c))
-#if	PKCODE
-#define isupper(c)	(('A' <= c && 'I' >= c) || ('J' <= c && 'R' >= c) || ('S' <= c && 'Z' >= c))
-#endif
-
-#endif
-
 /*	Dynamic RAM tracking and reporting redefinitions	*/

 #if	RAMSIZE
--- a/input.c
+++ b/input.c
@ -19,6 +19,7 @@
 #include "bindable.h"
 #include "display.h"
 #include "exec.h"
+#include "isa.h"
 #include "names.h"
 #include "terminal.h"
 #include "utf8.h"
@ -427,9 +428,9 @@ handle_CSI:
            goto proc_metac;
        }
 #endif
-        if (islower(c)) /* Force to upper */
-            c ^= DIFCASE;
-        if (c >= 0x00 && c <= 0x1F) /* control key */
+        if( islower( c)) /* Force to upper */
+            c = flipcase( c) ;
+        else if( c >= 0x00 && c <= 0x1F) /* control key */
            c = CONTROL | (c + '@');
        return META | c;
    }
@ -442,9 +443,9 @@ handle_CSI:
            goto proc_metac;
        }
 #endif
-        if (islower(c)) /* Force to upper */
-            c ^= DIFCASE;
-        if (c >= 0x00 && c <= 0x1F) /* control key */
+        if( islower( c)) /* Force to upper */
+            c = flipcase( c) ;
+        else if( c >= 0x00 && c <= 0x1F) /* control key */
            c = CONTROL | (c + '@');
        return META | c;
    }
--- a/isa.h
+++ b/isa.h
@ -0,0 +1,44 @@
+/* isa.h -- isletter, islower, isupper, flipcase */
+
+#ifndef __ISA_H__
+#define __ISA_H__
+
+#define	NATIONL	0	/* if 1, interpret [,],\,{,},| as characters P.K.    */
+
+
+#ifdef	islower
+#undef	islower
+#endif
+
+#ifdef	isupper
+#undef	isupper
+#endif
+
+#if	NATIONL
+#define LASTUL ']'
+#define LASTLL '}'
+#else
+#define LASTUL 'Z'
+#define LASTLL 'z'
+#endif
+
+#define isletter(c)	__isxletter((0xFF & (c)))
+#define islower(c)	isxlower((0xFF & (c)))
+#define isupper(c)	isxupper((0xFF & (c)))
+
+#define __isxletter(c)	(('a' <= c && LASTLL >= c) || ('A' <= c && LASTUL >= c) || (192<=c /* && c<=255 */))
+#define isxlower(c)	(('a' <= c && LASTLL >= c) || (224 <= c && 252 >= c))
+#define isxupper(c)	(('A' <= c && LASTUL >= c) || (192 <= c && 220 >= c))
+
+/* DIFCASE represents the integer difference between upper and lower
+   case letters. It is an xor-able value, which is fortunate, since the
+   relative positions of upper to lower case letters is the opposite of
+   ascii in ebcdic.
+*/
+
+#define	DIFCASE	0x20					/* ASCII 'a' - 'A' */
+#define flipcase( c) ((c) ^ DIFCASE)	/* Toggle the case of a letter. */
+
+#endif /* __ISA_H__ */
+
+/* end of isa.h */
--- a/search.c
+++ b/search.c
@ -69,6 +69,7 @@
 #include "display.h"
 #include "estruct.h"
 #include "input.h"
+#include "isa.h"
 #include "line.h"
 #include "mlout.h"
 #include "terminal.h"
@ -124,7 +125,6 @@ spat_t rpat ;	/* replacement pattern          */
 #define	MC_ESC		'\\'	/* Escape - suppress meta-meaning. */

 #define	BIT(n)		(1 << (n))	/* An integer with one bit set. */
-#define	CHCASE(c)	((c) ^ DIFCASE)	/* Toggle the case of a letter. */

 /* HICHAR - 1 is the largest character we will deal with.
 * HIBYTE represents the number of bytes in the bitmap.
@ -673,11 +673,11 @@ int scanner(const char *patrn, int direct, int beg_or_end)
 int eq(unsigned char bc, unsigned char pc)
 {
 	if ((curwp->w_bufp->b_mode & MDEXACT) == 0) {
-		if (islower(bc))
-			bc ^= DIFCASE;
+		if( islower( bc))
+			bc = flipcase( bc) ;

-		if (islower(pc))
-			pc ^= DIFCASE;
+		if( islower( pc))
+			pc = flipcase( pc) ;
 	}

 	return bc == pc;
@ -1455,7 +1455,7 @@ static int mceq(int bc, struct magic *mt)
 		if (!(result = biteq(bc, mt->u.cclmap))) {
 			if ((curwp->w_bufp->b_mode & MDEXACT) == 0 &&
 			    (isletter(bc))) {
-				result = biteq(CHCASE(bc), mt->u.cclmap);
+				result = biteq( flipcase( bc), mt->u.cclmap) ;
 			}
 		}
 		break;
@ -1465,7 +1465,7 @@ static int mceq(int bc, struct magic *mt)

 		if ((curwp->w_bufp->b_mode & MDEXACT) == 0 &&
 		    (isletter(bc))) {
-			result &= !biteq(CHCASE(bc), mt->u.cclmap);
+			result &= !biteq( flipcase( bc), mt->u.cclmap) ;
 		}
 		break;

--- a/word.c
+++ b/word.c
@ -15,6 +15,7 @@
 #include "basic.h"
 #include "buffer.h"
 #include "estruct.h"
+#include "isa.h"
 #include "line.h"
 #include "mlout.h"
 #include "random.h"
@ -145,12 +146,8 @@ int upperword(int f, int n)
 		}
 		while (inword() != FALSE) {
 			c = lgetc(curwp->w_dotp, curwp->w_doto);
-#if	PKCODE
-			if (islower(c)) {
-#else
-			if (c >= 'a' && c <= 'z') {
-#endif
-				c -= 'a' - 'A';
+			if( islower( c)) {
+				c = flipcase( c) ;
 				lputc(curwp->w_dotp, curwp->w_doto, c);
 				lchange(WFHARD);
 			}
@ -181,12 +178,8 @@ int lowerword(int f, int n)
 		}
 		while (inword() != FALSE) {
 			c = lgetc(curwp->w_dotp, curwp->w_doto);
-#if	PKCODE
-			if (isupper(c)) {
-#else
-			if (c >= 'A' && c <= 'Z') {
-#endif
-				c += 'a' - 'A';
+			if( isupper( c)) {
+				c = flipcase( c) ;
 				lputc(curwp->w_dotp, curwp->w_doto, c);
 				lchange(WFHARD);
 			}
@ -218,12 +211,8 @@ int capword(int f, int n)
 		}
 		if (inword() != FALSE) {
 			c = lgetc(curwp->w_dotp, curwp->w_doto);
-#if	PKCODE
-			if (islower(c)) {
-#else
-			if (c >= 'a' && c <= 'z') {
-#endif
-				c -= 'a' - 'A';
+			if( islower( c)) {
+				c = flipcase( c) ;
 				lputc(curwp->w_dotp, curwp->w_doto, c);
 				lchange(WFHARD);
 			}
@ -231,12 +220,8 @@ int capword(int f, int n)
 				return FALSE;
 			while (inword() != FALSE) {
 				c = lgetc(curwp->w_dotp, curwp->w_doto);
-#if	PKCODE
-				if (isupper(c)) {
-#else
-				if (c >= 'A' && c <= 'Z') {
-#endif
-					c += 'a' - 'A';
+				if( isupper( c)) {
+					c = flipcase( c) ;
 					lputc(curwp->w_dotp, curwp->w_doto,
 					      c);
 					lchange(WFHARD);
@ -385,24 +370,14 @@ int delbword(int f, int n)
 * Return TRUE if the character at dot is a character that is considered to be
 * part of a word. The word character list is hard coded. Should be setable.
 */
-static int inword(void)
-{
+static int inword( void) {
 	int c;

-	if (curwp->w_doto == llength(curwp->w_dotp))
-		return FALSE;
-	c = lgetc(curwp->w_dotp, curwp->w_doto);
-#if	PKCODE
-	if (isletter(c))
-#else
-	if (c >= 'a' && c <= 'z')
-		return TRUE;
-	if (c >= 'A' && c <= 'Z')
-#endif
-		return TRUE;
-	if (c >= '0' && c <= '9')
-		return TRUE;
-	return FALSE;
+	if( curwp->w_doto == llength( curwp->w_dotp))
+		return FALSE ;
+
+	c = lgetc( curwp->w_dotp, curwp->w_doto) ;
+	return isletter( c) || ( c >= '0' && c <= '9') ;
 }

 #if	WORDPRO
@ -699,14 +674,7 @@ int wordcount(int f, int n)
 		}

 		/* and tabulate it */
-		wordflag = (
-#if	PKCODE
-				   (isletter(ch)) ||
-#else
-				   (ch >= 'a' && ch <= 'z') ||
-				   (ch >= 'A' && ch <= 'Z') ||
-#endif
-				   (ch >= '0' && ch <= '9'));
+		wordflag = isletter( ch) || (ch >= '0' && ch <= '9') ;
 		if (wordflag == TRUE && lastword == FALSE)
 			++nwords;
 		lastword = wordflag;