Mmm, too complex. Just count.

This commit is contained in:
Neil 2022-12-12 21:25:28 -08:00
parent eb8d42ba3e
commit 58a5c8708d
5 changed files with 107 additions and 37 deletions

View File

@ -1,3 +1,5 @@
#include "main.h"
enum kjv_book {
Genesis,
Exodus,
@ -69,4 +71,7 @@ Revelation,
KJV_BOOK_SIZE };
enum kjv_status { KJV_ERROR, KJV_DONE, KJV_CHAPTER };
int kjv_filename(const char *, enum kjv_book *);
enum kjv_status kjv_chapter(const char *);

View File

@ -8,6 +8,7 @@
#include <errno.h>
#include <stdio.h> /* debug */
/** [`s`,`e`) => `n` */
static int parse_natural(const char *s, const char *const e, unsigned *const n) {
unsigned accum = 0;
while(s < e) {
@ -23,18 +24,48 @@ static int parse_natural(const char *s, const char *const e, unsigned *const n)
/*!re2c /**/
re2c:yyfill:enable = 0;
re2c:define:YYCTYPE = char;
sentinel = "\x00";
newline = "\n";
natural = [1-9][0-9]*;
whitespace = [ \t\v\f];
word = [^ \t\v\f\n\x00]+;
*/
/** `fn` contains "<number>[*].txt", sticks that in `book_no`, otherwise
returns false. */
int kjv_filename(const char *fn, unsigned *book_no) {
const char *YYCURSOR = fn, *YYMARKER, *yyt1, *yyt2, *s0, *s1;
assert(fn && book_no);
/*!re2c /**/
*
{ return 0; }
@s0 natural @s1 [^.\x00]* ".txt" sentinel
@s0 natural @s1 [^.\x00]* ".txt" "\x00"
{ return parse_natural(s0, s1, book_no); }
*/
}
#if 0
/*!conditions:re2c*/
enum kjv_status kjv_chapter(const char *YYCURSOR, struct book *const book) {
const char *YYMARKER, *s0, *s1;
int c = yycinit;
/*!re2c /**/
re2c:define:YYGETCONDITION = "c";
re2c:define:YYSETCONDITION = "c = @@;";
*/
assert(book);
/*YYCURSOR = book;*/
return KJV_ERROR;
scan:
/*!re2c /**/
<*> * { return KJV_ERROR; }
<*> "\x00" { return KJV_DONE; }
<line> [^\n\x00]* "\n" { goto scan; }
<line> "[" natural ":" natural "]" :=> verse
<verse> whitespace+ { goto scan; }
<verse> @s0 word @s1 {
}
*/
}
#endif

View File

@ -1,11 +1,5 @@
/** @license 20xx Neil Edelman, distributed under the terms of the
[GNU General Public License 3](https://opensource.org/licenses/GPL-3.0).
@license 20xx Neil Edelman, distributed under the terms of the
[MIT License](https://opensource.org/licenses/MIT).
This is a standard C file.
@std C89 */
/** @license 2022 Neil Edelman, distributed under the terms of the
[MIT License](https://opensource.org/licenses/MIT). */
#include "kjv.h"
#include <stdlib.h>
@ -49,40 +43,66 @@ finally:
return success;
}
#define ARRAY_NAME verse
#define ARRAY_TYPE struct verse_array
#include "array.h"
struct book { struct char_array backing; struct verse_array chapter; };
int main_new_chapter(struct book *const book) {
assert(book);
return 0;
}
struct verse *main_new_verse(struct verse_array *const chapter) {
assert(chapter);
return verse_array_new(chapter);
}
int main(void) {
const char *const dir_name = "KJV";
struct book kjv[KJV_BOOK_SIZE] = { 0 };
int success = EXIT_SUCCESS;
DIR *dir = 0;
struct dirent *de = 0;
struct char_array book[KJV_BOOK_SIZE] = { 0 };
unsigned i;
errno = 0;
/* Read all files in `dir_name`. */
/* Read in the kjv from all files.
fixme: this is lazy; all one object would be best. */
if(chdir(dir_name) == -1 || !(dir = opendir("."))) goto catch;
while((de = readdir(dir))) {
while((de = readdir(dir))) { /* For all files in directory. */
unsigned ordinal;
enum kjv_book b;
if(!kjv_filename(de->d_name, &ordinal))
if(!kjv_filename(de->d_name, &ordinal)) /* Extract ordinal. */
{ fprintf(stderr, "Ignored <%s>.\n", de->d_name); continue; }
printf("<%s> ordinal: %u\n", de->d_name, ordinal);
if(ordinal < 1 || ordinal > KJV_BOOK_SIZE)
{ errno = ERANGE; goto catch; } /* Not in range. */
if(book[b = ordinal - 1].data)
if(kjv[b = ordinal - 1].backing.data) /* Convert to zero-based. */
{ errno = EDOM; goto catch; } /* Duplicate. */
if(!append_file(book + b, de->d_name)) goto catch;
if(!append_file(&kjv[b].backing, de->d_name)) goto catch;
}
closedir(dir), de = 0, dir = 0;
for(i = 0; i < KJV_BOOK_SIZE; i++)
if(!book[i].data) { errno = EDOM; goto catch; } /* Not there. */
/**/
/* Parse the files into chapters. */
for(i = 0; i < KJV_BOOK_SIZE; i++) {
if(!kjv[i].backing.data) { fprintf(stderr, "Missing book %u.\n", i + 1);
errno = EDOM; goto catch; }
/*for( ; ; ) { switch(kjv_chapter(kjv + i)) {
case KJV_ERROR: goto catch;
case KJV_DONE: goto finally;
case KJV_CHAPTER: break;
}}*/
}
goto finally;
catch:
success = EXIT_FAILURE;
perror(de ? de->d_name : dir_name);
if(dir && closedir(dir)) perror(dir_name);
finally:
for(i = 0; i < KJV_BOOK_SIZE; i++) char_array_(&book[i]);
for(i = 0; i < KJV_BOOK_SIZE; i++)
char_array_(&kjv[i].backing), verse_array_(&kjv[i].chapter);
return success;
}

4
kjv/src/main.h Normal file
View File

@ -0,0 +1,4 @@
#include <stddef.h>
struct verse { const char *s, *e; size_t words; };
struct verse_array;
struct verse *main_new_verse(struct verse_array *);

View File

@ -131,10 +131,10 @@ int lex_next(struct lex *const x) {
re2c:define:YYSETCONDITION = 'scan.condition = @@;';
re2c:define:YYSETCONDITION:naked = 1;
sentinel = "\x00";
illegal = [\x01-\x08\x0a-\x1f\x7f]; /* unix-style control characters */
newline = "\n";
unix_control = [\x01-\x08\x0a-\x1f\x7f];
ws = [ \t];
glyph = [^] \ (sentinel | illegal | newline | ws);
glyph = [^] \ (sentinel | unix_control | newline | ws);
keyword = ([a-zA-Z] | [0-9][0-9_\-]*[a-zA-Z]) [a-zA-Z0-9_\-]*;
decimal = "-"? ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0];
natural = [1-9][0-9]*;
@ -148,29 +148,20 @@ int lex_next(struct lex *const x) {
x->s0 = x->s1 = 0;
scan:
/*!re2c /**/
<*> illegal { return x->symbol = ILLEGAL, 0; }
<*> unix_control { return x->symbol = ILLEGAL, 0; }
<*> * { return x->symbol = SYNTAX, 0; }
<*> sentinel
<*> sentinel /* New line always delimits. */
{ return x->symbol = scan.condition == yycline ? END : ILLEGAL, 0; }
<expect_line> newline => line { x->line = ++scan.line; goto scan; }
/* This is lazy! break them up into separate words. */
<expect_caption> ws* @s0 glyph (glyph | ws)* @s1 ws* / newline
=> expect_line
{ x->s0 = s0, x->s1 = s1; return x->symbol = CAPTION, 1; }
/* Recognized symbols that go at the beginning of a line. */
<line> newline { x->line = ++scan.line; return x->symbol = PARAGRAPH, 1; }
/* Symbols that go at the beginning of a line. */
<line> newline { x->line = ++scan.line; goto scan; }
<line> "[" :=> edict
<line> "--" :=> source
<line> "->" :=> location
<line> "*" ws => text
{ return x->symbol = LIST_ITEM, 1; }
<line> @s0 natural @s1 "." ws => text
{ x->s0 = s0, x->s1 = s1; return x->symbol = ORDERED_LIST_ITEM, 1; }
<line> * :=> text
<line> "!" => text { return x->symbol = COMPLETE, 1; }
<line> "^" => text { return x->symbol = CANCELLED, 1; }
<line> "#" => text { return x->symbol = HEADING, 1; }
/* Just plain text. */
<line> ws* / glyph :=> text /* Match-empty-string: text takes care of it. */
<text> newline => line { x->line = ++scan.line; goto scan; }
<text, bible> ws+ { goto scan; }
@ -219,6 +210,25 @@ scan:
<map> "(" @s0 decimal "," @s1 decimal ")" => expect_caption
{ x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION, 1; }
<edict> "source" :=> source
<edict> "ed" :=> ed
<edict> "contact" :=> contact
<edict> "glider" :=> glider
<edict> "flight" :=> flight
<edict> "bible" :=> bible
<edict> "book" :=> book
<edict> "movie" :=> movie
<edict> "tv" :=> tv
<edict> "medication" :=> medication
<edict> "idea" :=> idea
<edict> "vaccine" :=> vaccine
<edict> "in" :=> in
<edict> "" / natural :=> significant
<edict> [0-1][0-9] "-" [0-3][0-9]
", " [0-2][0-9] ":" [0-5][0-9] "] "
:=> text /* This is likely WhatsApp conversations. Ignore. */
/* missed, show, 'First, Second', 'Sounds', 'CSS', ..., 'Swanky', 'Shields' */
/* How did it get into my journal? */
<edict> "source"
{ if(scan.is_ws_expected || scan.edict.size)