From 9c1ec4fdf81efeace36888c49a621077ed5b0e0a Mon Sep 17 00:00:00 2001
From: Renaud Fivet <renaud.fivet@gmail.com>
Date: Thu, 25 Jul 2019 19:13:40 +0800
Subject: [PATCH] Review macroes for character classification.

---
 Makefile  |  8 +++---
 estruct.h | 73 ++-----------------------------------------------------
 input.c   | 13 +++++-----
 isa.h     | 44 +++++++++++++++++++++++++++++++++
 search.c  | 14 +++++------
 word.c    | 64 ++++++++++++------------------------------------
 6 files changed, 80 insertions(+), 136 deletions(-)
 create mode 100644 isa.h

diff --git a/Makefile b/Makefile
index ee98eaa..1c3da8d 100644
--- a/Makefile
+++ b/Makefile
@@ -161,7 +161,7 @@ file.o: file.c file.h buffer.h line.h retcode.h utf8.h defines.h \
 fileio.o: fileio.c fileio.h defines.h retcode.h utf8.h
 flook.o: flook.c flook.h retcode.h defines.h fileio.h
 input.o: input.c input.h bind.h estruct.h bindable.h display.h utf8.h \
- exec.h retcode.h names.h terminal.h defines.h wrapper.h
+ exec.h retcode.h isa.h names.h terminal.h defines.h wrapper.h
 isearch.o: isearch.c isearch.h basic.h retcode.h buffer.h line.h utf8.h \
  display.h estruct.h exec.h input.h bind.h search.h terminal.h defines.h \
  util.h window.h
@@ -184,8 +184,8 @@ random.o: random.c random.h retcode.h basic.h buffer.h line.h utf8.h \
 region.o: region.c region.h line.h retcode.h utf8.h buffer.h estruct.h \
  mlout.h random.h window.h defines.h
 search.o: search.c search.h line.h retcode.h utf8.h basic.h buffer.h \
- display.h estruct.h input.h bind.h mlout.h terminal.h defines.h util.h \
- window.h
+ display.h estruct.h input.h bind.h isa.h mlout.h terminal.h defines.h \
+ util.h window.h
 spawn.o: spawn.c spawn.h defines.h buffer.h line.h retcode.h utf8.h \
  display.h estruct.h exec.h file.h flook.h input.h bind.h terminal.h \
  window.h
@@ -197,7 +197,7 @@ util.o: util.c util.h
 window.o: window.c window.h defines.h buffer.h line.h retcode.h utf8.h \
  basic.h display.h estruct.h execute.h terminal.h wrapper.h
 word.o: word.c word.h basic.h retcode.h buffer.h line.h utf8.h estruct.h \
- mlout.h random.h region.h window.h defines.h
+ isa.h mlout.h random.h region.h window.h defines.h
 wrapper.o: wrapper.c wrapper.h
 wscreen.o: wscreen.c wscreen.h
 
diff --git a/estruct.h b/estruct.h
index ed95fd7..4baa6eb 100644
--- a/estruct.h
+++ b/estruct.h
@@ -124,23 +124,13 @@
 
 #define	CLEAN	0  /* de-alloc memory on exit                      */
 
-#define ASCII	1  /* always using ASCII char sequences for now    */
-#define EBCDIC	0  /* later IBM mainfraim versions will use EBCDIC */
-
 #ifndef	AUTOCONF
-
-#define	XONXOFF	0  /* don't disable XON-XOFF flow control P.K.     */
-#define	NATIONL	0  /* interprete [,],\,{,},| as characters P.K.    */
-
+# define XONXOFF	0  /* don't disable XON-XOFF flow control P.K.     */
 #else
-
-#define	XONXOFF	UNIX
-#define	NATIONL	UNIX
-
+# define XONXOFF	UNIX
 #endif /* Autoconf. */
 
 #define	PKCODE	1      /* include my extensions P.K., define always    */
-#define	IBMCHR	MSDOS  /* use IBM PC character set P.K.                */
 #define SCROLLCODE 1   /* scrolling code P.K.                          */
 
 /* Define some ability flags. */
@@ -157,65 +147,6 @@
 # define ENVFUNC	0
 #endif
 
-/* DIFCASE represents the integer difference between upper
-   and lower case letters.  It is an xor-able value, which is
-   fortunate, since the relative positions of upper to lower
-   case letters is the opposite of ascii in ebcdic.
-*/
-
-#ifdef	islower
-#undef	islower
-#endif
-
-#if	PKCODE
-#ifdef	isupper
-#undef	isupper
-#endif
-#endif
-
-#if	ASCII
-
-#define	DIFCASE		0x20
-
-#if	NATIONL
-#define LASTUL ']'
-#define LASTLL '}'
-#else
-#define LASTUL 'Z'
-#define LASTLL 'z'
-#endif
-
-#if	IBMCHR
-
-#define isletter(c)	(('a' <= c && LASTLL >= c) || ('A' <= c && LASTUL >= c) || (128<=c && c<=167))
-#define islower(c)	(('a' <= c && LASTLL >= c))
-#define isupper(c)	(('A' <= c && LASTUL >= c))
-
-#else
-
-#define isletter(c)	__isxletter((0xFF & (c)))
-#define islower(c)	isxlower((0xFF & (c)))
-#define isupper(c)	isxupper((0xFF & (c)))
-
-#define __isxletter(c)	(('a' <= c && LASTLL >= c) || ('A' <= c && LASTUL >= c) || (192<=c /* && c<=255 */))
-#define isxlower(c)	(('a' <= c && LASTLL >= c) || (224 <= c && 252 >= c))
-#define isxupper(c)	(('A' <= c && LASTUL >= c) || (192 <= c && 220 >= c))
-
-#endif
-
-#endif
-
-#if	EBCDIC
-
-#define	DIFCASE		0x40
-#define isletter(c)	(('a' <= c && 'i' >= c) || ('j' <= c && 'r' >= c) || ('s' <= c && 'z' >= c) || ('A' <= c && 'I' >= c) || ('J' <= c && 'R' >= c) || ('S' <= c && 'Z' >= c))
-#define islower(c)	(('a' <= c && 'i' >= c) || ('j' <= c && 'r' >= c) || ('s' <= c && 'z' >= c))
-#if	PKCODE
-#define isupper(c)	(('A' <= c && 'I' >= c) || ('J' <= c && 'R' >= c) || ('S' <= c && 'Z' >= c))
-#endif
-
-#endif
-
 /*	Dynamic RAM tracking and reporting redefinitions	*/
 
 #if	RAMSIZE
diff --git a/input.c b/input.c
index c368406..ccaddc4 100644
--- a/input.c
+++ b/input.c
@@ -19,6 +19,7 @@
 #include "bindable.h"
 #include "display.h"
 #include "exec.h"
+#include "isa.h"
 #include "names.h"
 #include "terminal.h"
 #include "utf8.h"
@@ -427,9 +428,9 @@ handle_CSI:
             goto proc_metac;
         }
 #endif
-        if (islower(c)) /* Force to upper */
-            c ^= DIFCASE;
-        if (c >= 0x00 && c <= 0x1F) /* control key */
+        if( islower( c)) /* Force to upper */
+            c = flipcase( c) ;
+        else if( c >= 0x00 && c <= 0x1F) /* control key */
             c = CONTROL | (c + '@');
         return META | c;
     }
@@ -442,9 +443,9 @@ handle_CSI:
             goto proc_metac;
         }
 #endif
-        if (islower(c)) /* Force to upper */
-            c ^= DIFCASE;
-        if (c >= 0x00 && c <= 0x1F) /* control key */
+        if( islower( c)) /* Force to upper */
+            c = flipcase( c) ;
+        else if( c >= 0x00 && c <= 0x1F) /* control key */
             c = CONTROL | (c + '@');
         return META | c;
     }
diff --git a/isa.h b/isa.h
new file mode 100644
index 0000000..3f1ca06
--- /dev/null
+++ b/isa.h
@@ -0,0 +1,44 @@
+/* isa.h -- isletter, islower, isupper, flipcase */
+
+#ifndef __ISA_H__
+#define __ISA_H__
+
+#define	NATIONL	0	/* if 1, interpret [,],\,{,},| as characters P.K.    */
+
+
+#ifdef	islower
+#undef	islower
+#endif
+
+#ifdef	isupper
+#undef	isupper
+#endif
+
+#if	NATIONL
+#define LASTUL ']'
+#define LASTLL '}'
+#else
+#define LASTUL 'Z'
+#define LASTLL 'z'
+#endif
+
+#define isletter(c)	__isxletter((0xFF & (c)))
+#define islower(c)	isxlower((0xFF & (c)))
+#define isupper(c)	isxupper((0xFF & (c)))
+
+#define __isxletter(c)	(('a' <= c && LASTLL >= c) || ('A' <= c && LASTUL >= c) || (192<=c /* && c<=255 */))
+#define isxlower(c)	(('a' <= c && LASTLL >= c) || (224 <= c && 252 >= c))
+#define isxupper(c)	(('A' <= c && LASTUL >= c) || (192 <= c && 220 >= c))
+
+/* DIFCASE represents the integer difference between upper and lower
+   case letters. It is an xor-able value, which is fortunate, since the
+   relative positions of upper to lower case letters is the opposite of
+   ascii in ebcdic.
+*/
+
+#define	DIFCASE	0x20					/* ASCII 'a' - 'A' */
+#define flipcase( c) ((c) ^ DIFCASE)	/* Toggle the case of a letter. */
+
+#endif /* __ISA_H__ */
+
+/* end of isa.h */
diff --git a/search.c b/search.c
index 38498ca..02da164 100644
--- a/search.c
+++ b/search.c
@@ -69,6 +69,7 @@
 #include "display.h"
 #include "estruct.h"
 #include "input.h"
+#include "isa.h"
 #include "line.h"
 #include "mlout.h"
 #include "terminal.h"
@@ -124,7 +125,6 @@ spat_t rpat ;	/* replacement pattern          */
 #define	MC_ESC		'\\'	/* Escape - suppress meta-meaning. */
 
 #define	BIT(n)		(1 << (n))	/* An integer with one bit set. */
-#define	CHCASE(c)	((c) ^ DIFCASE)	/* Toggle the case of a letter. */
 
 /* HICHAR - 1 is the largest character we will deal with.
  * HIBYTE represents the number of bytes in the bitmap.
@@ -673,11 +673,11 @@ int scanner(const char *patrn, int direct, int beg_or_end)
 int eq(unsigned char bc, unsigned char pc)
 {
 	if ((curwp->w_bufp->b_mode & MDEXACT) == 0) {
-		if (islower(bc))
-			bc ^= DIFCASE;
+		if( islower( bc))
+			bc = flipcase( bc) ;
 
-		if (islower(pc))
-			pc ^= DIFCASE;
+		if( islower( pc))
+			pc = flipcase( pc) ;
 	}
 
 	return bc == pc;
@@ -1455,7 +1455,7 @@ static int mceq(int bc, struct magic *mt)
 		if (!(result = biteq(bc, mt->u.cclmap))) {
 			if ((curwp->w_bufp->b_mode & MDEXACT) == 0 &&
 			    (isletter(bc))) {
-				result = biteq(CHCASE(bc), mt->u.cclmap);
+				result = biteq( flipcase( bc), mt->u.cclmap) ;
 			}
 		}
 		break;
@@ -1465,7 +1465,7 @@ static int mceq(int bc, struct magic *mt)
 
 		if ((curwp->w_bufp->b_mode & MDEXACT) == 0 &&
 		    (isletter(bc))) {
-			result &= !biteq(CHCASE(bc), mt->u.cclmap);
+			result &= !biteq( flipcase( bc), mt->u.cclmap) ;
 		}
 		break;
 
diff --git a/word.c b/word.c
index 1b1360b..f49b3fb 100644
--- a/word.c
+++ b/word.c
@@ -15,6 +15,7 @@
 #include "basic.h"
 #include "buffer.h"
 #include "estruct.h"
+#include "isa.h"
 #include "line.h"
 #include "mlout.h"
 #include "random.h"
@@ -145,12 +146,8 @@ int upperword(int f, int n)
 		}
 		while (inword() != FALSE) {
 			c = lgetc(curwp->w_dotp, curwp->w_doto);
-#if	PKCODE
-			if (islower(c)) {
-#else
-			if (c >= 'a' && c <= 'z') {
-#endif
-				c -= 'a' - 'A';
+			if( islower( c)) {
+				c = flipcase( c) ;
 				lputc(curwp->w_dotp, curwp->w_doto, c);
 				lchange(WFHARD);
 			}
@@ -181,12 +178,8 @@ int lowerword(int f, int n)
 		}
 		while (inword() != FALSE) {
 			c = lgetc(curwp->w_dotp, curwp->w_doto);
-#if	PKCODE
-			if (isupper(c)) {
-#else
-			if (c >= 'A' && c <= 'Z') {
-#endif
-				c += 'a' - 'A';
+			if( isupper( c)) {
+				c = flipcase( c) ;
 				lputc(curwp->w_dotp, curwp->w_doto, c);
 				lchange(WFHARD);
 			}
@@ -218,12 +211,8 @@ int capword(int f, int n)
 		}
 		if (inword() != FALSE) {
 			c = lgetc(curwp->w_dotp, curwp->w_doto);
-#if	PKCODE
-			if (islower(c)) {
-#else
-			if (c >= 'a' && c <= 'z') {
-#endif
-				c -= 'a' - 'A';
+			if( islower( c)) {
+				c = flipcase( c) ;
 				lputc(curwp->w_dotp, curwp->w_doto, c);
 				lchange(WFHARD);
 			}
@@ -231,12 +220,8 @@ int capword(int f, int n)
 				return FALSE;
 			while (inword() != FALSE) {
 				c = lgetc(curwp->w_dotp, curwp->w_doto);
-#if	PKCODE
-				if (isupper(c)) {
-#else
-				if (c >= 'A' && c <= 'Z') {
-#endif
-					c += 'a' - 'A';
+				if( isupper( c)) {
+					c = flipcase( c) ;
 					lputc(curwp->w_dotp, curwp->w_doto,
 					      c);
 					lchange(WFHARD);
@@ -385,24 +370,14 @@ int delbword(int f, int n)
  * Return TRUE if the character at dot is a character that is considered to be
  * part of a word. The word character list is hard coded. Should be setable.
  */
-static int inword(void)
-{
+static int inword( void) {
 	int c;
 
-	if (curwp->w_doto == llength(curwp->w_dotp))
-		return FALSE;
-	c = lgetc(curwp->w_dotp, curwp->w_doto);
-#if	PKCODE
-	if (isletter(c))
-#else
-	if (c >= 'a' && c <= 'z')
-		return TRUE;
-	if (c >= 'A' && c <= 'Z')
-#endif
-		return TRUE;
-	if (c >= '0' && c <= '9')
-		return TRUE;
-	return FALSE;
+	if( curwp->w_doto == llength( curwp->w_dotp))
+		return FALSE ;
+
+	c = lgetc( curwp->w_dotp, curwp->w_doto) ;
+	return isletter( c) || ( c >= '0' && c <= '9') ;
 }
 
 #if	WORDPRO
@@ -699,14 +674,7 @@ int wordcount(int f, int n)
 		}
 
 		/* and tabulate it */
-		wordflag = (
-#if	PKCODE
-				   (isletter(ch)) ||
-#else
-				   (ch >= 'a' && ch <= 'z') ||
-				   (ch >= 'A' && ch <= 'Z') ||
-#endif
-				   (ch >= '0' && ch <= '9'));
+		wordflag = isletter( ch) || (ch >= '0' && ch <= '9') ;
 		if (wordflag == TRUE && lastword == FALSE)
 			++nwords;
 		lastword = wordflag;