From 58a5c8708db2227182167d938779aaaab0be5c3c Mon Sep 17 00:00:00 2001 From: Neil Date: Mon, 12 Dec 2022 21:25:28 -0800 Subject: [PATCH] Mmm, too complex. Just count. --- kjv/src/kjv.h | 5 +++++ kjv/src/kjv.re_c.c | 37 +++++++++++++++++++++++++++--- kjv/src/main.c | 56 +++++++++++++++++++++++++++++++--------------- kjv/src/main.h | 4 ++++ src/lex.re_c.c | 42 +++++++++++++++++++++------------- 5 files changed, 107 insertions(+), 37 deletions(-) create mode 100644 kjv/src/main.h diff --git a/kjv/src/kjv.h b/kjv/src/kjv.h index 5d6f6b1..f9c0534 100644 --- a/kjv/src/kjv.h +++ b/kjv/src/kjv.h @@ -1,3 +1,5 @@ +#include "main.h" + enum kjv_book { Genesis, Exodus, @@ -69,4 +71,7 @@ Revelation, KJV_BOOK_SIZE }; +enum kjv_status { KJV_ERROR, KJV_DONE, KJV_CHAPTER }; + int kjv_filename(const char *, enum kjv_book *); +enum kjv_status kjv_chapter(const char *); diff --git a/kjv/src/kjv.re_c.c b/kjv/src/kjv.re_c.c index 549eea4..9f5b83b 100644 --- a/kjv/src/kjv.re_c.c +++ b/kjv/src/kjv.re_c.c @@ -8,6 +8,7 @@ #include #include /* debug */ +/** [`s`,`e`) => `n` */ static int parse_natural(const char *s, const char *const e, unsigned *const n) { unsigned accum = 0; while(s < e) { @@ -23,18 +24,48 @@ static int parse_natural(const char *s, const char *const e, unsigned *const n) /*!re2c /**/ re2c:yyfill:enable = 0; re2c:define:YYCTYPE = char; -sentinel = "\x00"; -newline = "\n"; natural = [1-9][0-9]*; +whitespace = [ \t\v\f]; +word = [^ \t\v\f\n\x00]+; */ +/** `fn` contains "[*].txt", sticks that in `book_no`, otherwise + returns false. */ int kjv_filename(const char *fn, unsigned *book_no) { const char *YYCURSOR = fn, *YYMARKER, *yyt1, *yyt2, *s0, *s1; assert(fn && book_no); /*!re2c /**/ * { return 0; } - @s0 natural @s1 [^.\x00]* ".txt" sentinel + @s0 natural @s1 [^.\x00]* ".txt" "\x00" { return parse_natural(s0, s1, book_no); } */ } + +#if 0 + +/*!conditions:re2c*/ + +enum kjv_status kjv_chapter(const char *YYCURSOR, struct book *const book) { + const char *YYMARKER, *s0, *s1; + int c = yycinit; + /*!re2c /**/ + re2c:define:YYGETCONDITION = "c"; + re2c:define:YYSETCONDITION = "c = @@;"; + */ + assert(book); + /*YYCURSOR = book;*/ + return KJV_ERROR; +scan: + /*!re2c /**/ + <*> * { return KJV_ERROR; } + <*> "\x00" { return KJV_DONE; } + [^\n\x00]* "\n" { goto scan; } + "[" natural ":" natural "]" :=> verse + whitespace+ { goto scan; } + @s0 word @s1 { + } + */ +} + +#endif diff --git a/kjv/src/main.c b/kjv/src/main.c index 10efdce..7c0db18 100644 --- a/kjv/src/main.c +++ b/kjv/src/main.c @@ -1,11 +1,5 @@ -/** @license 20xx Neil Edelman, distributed under the terms of the - [GNU General Public License 3](https://opensource.org/licenses/GPL-3.0). - @license 20xx Neil Edelman, distributed under the terms of the - [MIT License](https://opensource.org/licenses/MIT). - - This is a standard C file. - - @std C89 */ +/** @license 2022 Neil Edelman, distributed under the terms of the + [MIT License](https://opensource.org/licenses/MIT). */ #include "kjv.h" #include @@ -49,40 +43,66 @@ finally: return success; } +#define ARRAY_NAME verse +#define ARRAY_TYPE struct verse_array +#include "array.h" + +struct book { struct char_array backing; struct verse_array chapter; }; + +int main_new_chapter(struct book *const book) { + assert(book); + return 0; +} + +struct verse *main_new_verse(struct verse_array *const chapter) { + assert(chapter); + return verse_array_new(chapter); +} + int main(void) { const char *const dir_name = "KJV"; + struct book kjv[KJV_BOOK_SIZE] = { 0 }; int success = EXIT_SUCCESS; DIR *dir = 0; struct dirent *de = 0; - struct char_array book[KJV_BOOK_SIZE] = { 0 }; unsigned i; errno = 0; - /* Read all files in `dir_name`. */ + /* Read in the kjv from all files. + fixme: this is lazy; all one object would be best. */ if(chdir(dir_name) == -1 || !(dir = opendir("."))) goto catch; - while((de = readdir(dir))) { + while((de = readdir(dir))) { /* For all files in directory. */ unsigned ordinal; enum kjv_book b; - if(!kjv_filename(de->d_name, &ordinal)) + if(!kjv_filename(de->d_name, &ordinal)) /* Extract ordinal. */ { fprintf(stderr, "Ignored <%s>.\n", de->d_name); continue; } printf("<%s> ordinal: %u\n", de->d_name, ordinal); if(ordinal < 1 || ordinal > KJV_BOOK_SIZE) { errno = ERANGE; goto catch; } /* Not in range. */ - if(book[b = ordinal - 1].data) + if(kjv[b = ordinal - 1].backing.data) /* Convert to zero-based. */ { errno = EDOM; goto catch; } /* Duplicate. */ - if(!append_file(book + b, de->d_name)) goto catch; + if(!append_file(&kjv[b].backing, de->d_name)) goto catch; } closedir(dir), de = 0, dir = 0; - for(i = 0; i < KJV_BOOK_SIZE; i++) - if(!book[i].data) { errno = EDOM; goto catch; } /* Not there. */ - /**/ + /* Parse the files into chapters. */ + for(i = 0; i < KJV_BOOK_SIZE; i++) { + if(!kjv[i].backing.data) { fprintf(stderr, "Missing book %u.\n", i + 1); + errno = EDOM; goto catch; } + /*for( ; ; ) { switch(kjv_chapter(kjv + i)) { + case KJV_ERROR: goto catch; + case KJV_DONE: goto finally; + case KJV_CHAPTER: break; + }}*/ + } + goto finally; catch: success = EXIT_FAILURE; perror(de ? de->d_name : dir_name); if(dir && closedir(dir)) perror(dir_name); finally: - for(i = 0; i < KJV_BOOK_SIZE; i++) char_array_(&book[i]); + for(i = 0; i < KJV_BOOK_SIZE; i++) + char_array_(&kjv[i].backing), verse_array_(&kjv[i].chapter); return success; } diff --git a/kjv/src/main.h b/kjv/src/main.h new file mode 100644 index 0000000..2aed449 --- /dev/null +++ b/kjv/src/main.h @@ -0,0 +1,4 @@ +#include +struct verse { const char *s, *e; size_t words; }; +struct verse_array; +struct verse *main_new_verse(struct verse_array *); diff --git a/src/lex.re_c.c b/src/lex.re_c.c index 39528d1..914412e 100644 --- a/src/lex.re_c.c +++ b/src/lex.re_c.c @@ -131,10 +131,10 @@ int lex_next(struct lex *const x) { re2c:define:YYSETCONDITION = 'scan.condition = @@;'; re2c:define:YYSETCONDITION:naked = 1; sentinel = "\x00"; - illegal = [\x01-\x08\x0a-\x1f\x7f]; /* unix-style control characters */ newline = "\n"; + unix_control = [\x01-\x08\x0a-\x1f\x7f]; ws = [ \t]; - glyph = [^] \ (sentinel | illegal | newline | ws); + glyph = [^] \ (sentinel | unix_control | newline | ws); keyword = ([a-zA-Z] | [0-9][0-9_\-]*[a-zA-Z]) [a-zA-Z0-9_\-]*; decimal = "-"? ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0]; natural = [1-9][0-9]*; @@ -148,29 +148,20 @@ int lex_next(struct lex *const x) { x->s0 = x->s1 = 0; scan: /*!re2c /**/ - <*> illegal { return x->symbol = ILLEGAL, 0; } + <*> unix_control { return x->symbol = ILLEGAL, 0; } <*> * { return x->symbol = SYNTAX, 0; } - <*> sentinel + <*> sentinel /* New line always delimits. */ { return x->symbol = scan.condition == yycline ? END : ILLEGAL, 0; } newline => line { x->line = ++scan.line; goto scan; } - /* This is lazy! break them up into separate words. */ - ws* @s0 glyph (glyph | ws)* @s1 ws* / newline - => expect_line - { x->s0 = s0, x->s1 = s1; return x->symbol = CAPTION, 1; } - /* Recognized symbols that go at the beginning of a line. */ - newline { x->line = ++scan.line; return x->symbol = PARAGRAPH, 1; } + /* Symbols that go at the beginning of a line. */ + newline { x->line = ++scan.line; goto scan; } "[" :=> edict "--" :=> source "->" :=> location - "*" ws => text - { return x->symbol = LIST_ITEM, 1; } - @s0 natural @s1 "." ws => text - { x->s0 = s0, x->s1 = s1; return x->symbol = ORDERED_LIST_ITEM, 1; } + * :=> text "!" => text { return x->symbol = COMPLETE, 1; } "^" => text { return x->symbol = CANCELLED, 1; } "#" => text { return x->symbol = HEADING, 1; } - /* Just plain text. */ - ws* / glyph :=> text /* Match-empty-string: text takes care of it. */ newline => line { x->line = ++scan.line; goto scan; } ws+ { goto scan; } @@ -219,6 +210,25 @@ scan: "(" @s0 decimal "," @s1 decimal ")" => expect_caption { x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION, 1; } + "source" :=> source + "ed" :=> ed + "contact" :=> contact + "glider" :=> glider + "flight" :=> flight + "bible" :=> bible + "book" :=> book + "movie" :=> movie + "tv" :=> tv + "medication" :=> medication + "idea" :=> idea + "vaccine" :=> vaccine + "in" :=> in + "" / natural :=> significant + [0-1][0-9] "-" [0-3][0-9] + ", " [0-2][0-9] ":" [0-5][0-9] "] " + :=> text /* This is likely WhatsApp conversations. Ignore. */ + /* missed, show, 'First, Second', 'Sounds', 'CSS', ..., 'Swanky', 'Shields' */ + /* How did it get into my journal? */ "source" { if(scan.is_ws_expected || scan.edict.size)