From 58a5c8708db2227182167d938779aaaab0be5c3c Mon Sep 17 00:00:00 2001
From: Neil <neil@sdf.org>
Date: Mon, 12 Dec 2022 21:25:28 -0800
Subject: [PATCH] Mmm, too complex. Just count.

---
 kjv/src/kjv.h      |  5 +++++
 kjv/src/kjv.re_c.c | 37 +++++++++++++++++++++++++++---
 kjv/src/main.c     | 56 +++++++++++++++++++++++++++++++---------------
 kjv/src/main.h     |  4 ++++
 src/lex.re_c.c     | 42 +++++++++++++++++++++-------------
 5 files changed, 107 insertions(+), 37 deletions(-)
 create mode 100644 kjv/src/main.h
diff --git a/kjv/src/kjv.h b/kjv/src/kjv.h
index 5d6f6b1..f9c0534 100644
--- a/kjv/src/kjv.h
+++ b/kjv/src/kjv.h
@@ -1,3 +1,5 @@
+#include "main.h"
+
 enum kjv_book {
 Genesis,
 Exodus,
@@ -69,4 +71,7 @@ Revelation,
 
 KJV_BOOK_SIZE };
 
+enum kjv_status { KJV_ERROR, KJV_DONE, KJV_CHAPTER };
+
 int kjv_filename(const char *, enum kjv_book *);
+enum kjv_status kjv_chapter(const char *);
diff --git a/kjv/src/kjv.re_c.c b/kjv/src/kjv.re_c.c
index 549eea4..9f5b83b 100644
--- a/kjv/src/kjv.re_c.c
+++ b/kjv/src/kjv.re_c.c
@@ -8,6 +8,7 @@
 #include <errno.h>
 #include <stdio.h> /* debug */
 
+/** [`s`,`e`) => `n` */
 static int parse_natural(const char *s, const char *const e, unsigned *const n) {
 	unsigned accum = 0;
 	while(s < e) {
@@ -23,18 +24,48 @@ static int parse_natural(const char *s, const char *const e, unsigned *const n)
 /*!re2c /**/
 re2c:yyfill:enable = 0;
 re2c:define:YYCTYPE = char;
-sentinel = "\x00";
-newline = "\n";
 natural = [1-9][0-9]*;
+whitespace = [ \t\v\f];
+word = [^ \t\v\f\n\x00]+;
 */
 
+/** `fn` contains "<number>[*].txt", sticks that in `book_no`, otherwise
+ returns false. */
 int kjv_filename(const char *fn, unsigned *book_no) {
 	const char *YYCURSOR = fn, *YYMARKER, *yyt1, *yyt2, *s0, *s1;
 	assert(fn && book_no);
 	/*!re2c /**/
 	*
 		{ return 0; }
-	@s0 natural @s1 [^.\x00]* ".txt" sentinel
+	@s0 natural @s1 [^.\x00]* ".txt" "\x00"
 		{ return parse_natural(s0, s1, book_no); }
 	*/
 }
+
+#if 0
+
+/*!conditions:re2c*/
+
+enum kjv_status kjv_chapter(const char *YYCURSOR, struct book *const book) {
+	const char *YYMARKER, *s0, *s1;
+	int c = yycinit;
+	/*!re2c /**/
+	re2c:define:YYGETCONDITION = "c";
+	re2c:define:YYSETCONDITION = "c = @@;";
+	*/
+	assert(book);
+	/*YYCURSOR = book;*/
+	return KJV_ERROR;
+scan:
+	/*!re2c /**/
+	<*> * { return KJV_ERROR; }
+	<*> "\x00" { return KJV_DONE; }
+	<line> [^\n\x00]* "\n" { goto scan; }
+	<line> "[" natural ":" natural "]" :=> verse
+	<verse> whitespace+ { goto scan; }
+	<verse> @s0 word @s1 {
+	}
+	*/
+}
+
+#endif
diff --git a/kjv/src/main.c b/kjv/src/main.c
index 10efdce..7c0db18 100644
--- a/kjv/src/main.c
+++ b/kjv/src/main.c
@@ -1,11 +1,5 @@
-/** @license 20xx Neil Edelman, distributed under the terms of the
- [GNU General Public License 3](https://opensource.org/licenses/GPL-3.0).
- @license 20xx Neil Edelman, distributed under the terms of the
- [MIT License](https://opensource.org/licenses/MIT).
-
- This is a standard C file.
-
- @std C89 */
+/** @license 2022 Neil Edelman, distributed under the terms of the
+ [MIT License](https://opensource.org/licenses/MIT). */
 
 #include "kjv.h"
 #include <stdlib.h>
@@ -49,40 +43,66 @@ finally:
 	return success;
 }
 
+#define ARRAY_NAME verse
+#define ARRAY_TYPE struct verse_array
+#include "array.h"
+
+struct book { struct char_array backing; struct verse_array chapter; };
+
+int main_new_chapter(struct book *const book) {
+	assert(book);
+	return 0;
+}
+
+struct verse *main_new_verse(struct verse_array *const chapter) {
+	assert(chapter);
+	return verse_array_new(chapter);
+}
+
 int main(void) {
 	const char *const dir_name = "KJV";
+	struct book kjv[KJV_BOOK_SIZE] = { 0 };
 	int success = EXIT_SUCCESS;
 	DIR *dir = 0;
 	struct dirent *de = 0;
-	struct char_array book[KJV_BOOK_SIZE] = { 0 };
 	unsigned i;
 	errno = 0;
 
-	/* Read all files in `dir_name`. */
+	/* Read in the kjv from all files.
+	 fixme: this is lazy; all one object would be best. */
 	if(chdir(dir_name) == -1 || !(dir = opendir("."))) goto catch;
-	while((de = readdir(dir))) {
+	while((de = readdir(dir))) { /* For all files in directory. */
 		unsigned ordinal;
 		enum kjv_book b;
-		if(!kjv_filename(de->d_name, &ordinal))
+		if(!kjv_filename(de->d_name, &ordinal)) /* Extract ordinal. */
 			{ fprintf(stderr, "Ignored <%s>.\n", de->d_name); continue; }
 		printf("<%s> ordinal: %u\n", de->d_name, ordinal);
 		if(ordinal < 1 || ordinal > KJV_BOOK_SIZE)
 			{ errno = ERANGE; goto catch; } /* Not in range. */
-		if(book[b = ordinal - 1].data)
+		if(kjv[b = ordinal - 1].backing.data) /* Convert to zero-based. */
 			{ errno = EDOM; goto catch; } /* Duplicate. */
-		if(!append_file(book + b, de->d_name)) goto catch;
+		if(!append_file(&kjv[b].backing, de->d_name)) goto catch;
 	}
 	closedir(dir), de = 0, dir = 0;
-	for(i = 0; i < KJV_BOOK_SIZE; i++)
-		if(!book[i].data) { errno = EDOM; goto catch; } /* Not there. */
 
-	/**/
+	/* Parse the files into chapters. */
+	for(i = 0; i < KJV_BOOK_SIZE; i++) {
+		if(!kjv[i].backing.data) { fprintf(stderr, "Missing book %u.\n", i + 1);
+			errno = EDOM; goto catch; }
+		/*for( ; ; ) { switch(kjv_chapter(kjv + i)) {
+		case KJV_ERROR: goto catch;
+		case KJV_DONE: goto finally;
+		case KJV_CHAPTER: break;
+		}}*/
+	}
+
 	goto finally;
 catch:
 	success = EXIT_FAILURE;
 	perror(de ? de->d_name : dir_name);
 	if(dir && closedir(dir)) perror(dir_name);
 finally:
-	for(i = 0; i < KJV_BOOK_SIZE; i++) char_array_(&book[i]);
+	for(i = 0; i < KJV_BOOK_SIZE; i++)
+		char_array_(&kjv[i].backing), verse_array_(&kjv[i].chapter);
 	return success;
 }
diff --git a/kjv/src/main.h b/kjv/src/main.h
new file mode 100644
index 0000000..2aed449
--- /dev/null
+++ b/kjv/src/main.h
@@ -0,0 +1,4 @@
+#include <stddef.h>
+struct verse { const char *s, *e; size_t words; };
+struct verse_array;
+struct verse *main_new_verse(struct verse_array *);
diff --git a/src/lex.re_c.c b/src/lex.re_c.c
index 39528d1..914412e 100644
--- a/src/lex.re_c.c
+++ b/src/lex.re_c.c
@@ -131,10 +131,10 @@ int lex_next(struct lex *const x) {
 	re2c:define:YYSETCONDITION = 'scan.condition = @@;';
 	re2c:define:YYSETCONDITION:naked = 1;
 	sentinel = "\x00";
-	illegal = [\x01-\x08\x0a-\x1f\x7f]; /* unix-style control characters */
 	newline = "\n";
+	unix_control = [\x01-\x08\x0a-\x1f\x7f];
 	ws = [ \t];
-	glyph = [^] \ (sentinel | illegal | newline | ws);
+	glyph = [^] \ (sentinel | unix_control | newline | ws);
 	keyword = ([a-zA-Z] | [0-9][0-9_\-]*[a-zA-Z]) [a-zA-Z0-9_\-]*;
 	decimal = "-"? ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0];
 	natural = [1-9][0-9]*;
@@ -148,29 +148,20 @@ int lex_next(struct lex *const x) {
 	x->s0 = x->s1 = 0;
 scan:
 	/*!re2c /**/
-	<*> illegal { return x->symbol = ILLEGAL, 0; }
+	<*> unix_control { return x->symbol = ILLEGAL, 0; }
 	<*> * { return x->symbol = SYNTAX, 0; }
-	<*> sentinel
+	<*> sentinel /* New line always delimits. */
 		{ return x->symbol = scan.condition == yycline ? END : ILLEGAL, 0; }
 	<expect_line> newline => line { x->line = ++scan.line; goto scan; }
-	/* This is lazy! break them up into separate words. */
-	<expect_caption> ws* @s0 glyph (glyph | ws)* @s1 ws* / newline
-		=> expect_line
-		{ x->s0 = s0, x->s1 = s1; return x->symbol = CAPTION, 1; }
-	/* Recognized symbols that go at the beginning of a line. */
-	<line> newline { x->line = ++scan.line; return x->symbol = PARAGRAPH, 1; }
+	/* Symbols that go at the beginning of a line. */
+	<line> newline { x->line = ++scan.line; goto scan; }
 	<line> "[" :=> edict
 	<line> "--" :=> source
 	<line> "->" :=> location
-	<line> "*" ws => text
-		{ return x->symbol = LIST_ITEM, 1; }
-	<line> @s0 natural @s1 "." ws => text
-		{ x->s0 = s0, x->s1 = s1; return x->symbol = ORDERED_LIST_ITEM, 1; }
+	<line> * :=> text
 	<line> "!" => text { return x->symbol = COMPLETE, 1; }
 	<line> "^" => text { return x->symbol = CANCELLED, 1; }
 	<line> "#" => text { return x->symbol = HEADING, 1; }
-	/* Just plain text. */
-	<line> ws* / glyph :=> text /* Match-empty-string: text takes care of it. */
 
 	<text> newline => line { x->line = ++scan.line; goto scan; }
 	<text, bible> ws+ { goto scan; }
@@ -219,6 +210,25 @@ scan:
 	<map> "(" @s0 decimal "," @s1 decimal ")" => expect_caption
 		{ x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION, 1; }
 
+	<edict> "source" :=> source
+	<edict> "ed" :=> ed
+	<edict> "contact" :=> contact
+	<edict> "glider" :=> glider
+	<edict> "flight" :=> flight
+	<edict> "bible" :=> bible
+	<edict> "book" :=> book
+	<edict> "movie" :=> movie
+	<edict> "tv" :=> tv
+	<edict> "medication" :=> medication
+	<edict> "idea" :=> idea
+	<edict> "vaccine" :=> vaccine
+	<edict> "in" :=> in
+	<edict> "" / natural :=> significant
+	<edict> [0-1][0-9] "-" [0-3][0-9]
+		", " [0-2][0-9] ":" [0-5][0-9] "] "
+		:=> text /* This is likely WhatsApp conversations. Ignore. */
+	/* missed, show, 'First, Second', 'Sounds', 'CSS', ..., 'Swanky', 'Shields' */
+
 	/* How did it get into my journal? */
 	<edict> "source"
 		{ if(scan.is_ws_expected || scan.edict.size)