From f00ed7f1bf3815262a333535a36c927c6634e27f Mon Sep 17 00:00:00 2001 From: Neil Date: Tue, 13 Dec 2022 00:31:56 -0800 Subject: [PATCH] Simple. Works? --- kjv/src/kjv.h | 77 --------------- kjv/src/kjv.re_c.c | 227 +++++++++++++++++++++++++++++++++++++++++---- kjv/src/main.c | 108 --------------------- kjv/src/main.h | 4 - 4 files changed, 207 insertions(+), 209 deletions(-) delete mode 100644 kjv/src/kjv.h delete mode 100644 kjv/src/main.c delete mode 100644 kjv/src/main.h diff --git a/kjv/src/kjv.h b/kjv/src/kjv.h deleted file mode 100644 index f9c0534..0000000 --- a/kjv/src/kjv.h +++ /dev/null @@ -1,77 +0,0 @@ -#include "main.h" - -enum kjv_book { -Genesis, -Exodus, -Leviticus, -Numbers, -Deuteronomy, -Joshua, -Judges, -Ruth, -ISamuel, -IISamuel, -IKings, -IIKings, -IChronicles, -IIChronicles, -Ezra, -Nehemiah, -Esther, -Job, -Psalms, -Proverbs, -Ecclesiastes, -Song_of_Solomon, -Isaiah, -Jeremiah, -Lamentations, -Ezekiel, -Daniel, -Hosea, -Joel, -Amos, -Obadiah, -Jonah, -Micah, -Nahum, -Habakkuk, -Zephaniah, -Haggai, -Zechariah, -Malachi, - -Matthew, -Mark, -Luke, -John, -Acts, -Romans, -ICorinthians, -IICorinthians, -Galatians, -Ephesians, -Philippians, -Colossians, -IThessalonians, -IIThessalonians, -ITimothy, -IITimothy, -Titus, -Philemon, -Hebrews, -James, -IPeter, -IIPeter, -IJohn, -IIJohn, -IIIJohn, -Jude, -Revelation, - -KJV_BOOK_SIZE }; - -enum kjv_status { KJV_ERROR, KJV_DONE, KJV_CHAPTER }; - -int kjv_filename(const char *, enum kjv_book *); -enum kjv_status kjv_chapter(const char *); diff --git a/kjv/src/kjv.re_c.c b/kjv/src/kjv.re_c.c index 9f5b83b..8a77cfa 100644 --- a/kjv/src/kjv.re_c.c +++ b/kjv/src/kjv.re_c.c @@ -3,10 +3,48 @@ Is intended to use . */ -#include "../src/kjv.h" +#include +#include #include #include -#include /* debug */ +#include /* opendir readdir closedir */ +#include /* chdir (POSIX) (because I'm lazy) */ + + + +#define ARRAY_NAME char +#define ARRAY_TYPE char +#include "../src/array.h" + +/** Append a text file, `fn`, to `c`, and add a '\0'. + @return Success. A partial read is failure. @throws[fopen, fread, malloc] + @throws[EISEQ] The text file has embedded nulls. + @throws[ERANGE] If the standard library does not follow POSIX. */ +static int append_file(struct char_array *c, const char *const fn) { + FILE *fp = 0; + const size_t granularity = 1024; + size_t nread; + char *cursor; + int success = 0; + assert(c && fn); + if(!(fp = fopen(fn, "r"))) goto catch; + /* Read entire file in chunks. */ + do if(!(cursor = char_array_buffer(c, granularity)) + || (nread = fread(cursor, 1, granularity, fp), ferror(fp)) + || !char_array_append(c, nread)) goto catch; + while(nread == granularity); + /* File to `C` string. */ + if(!(cursor = char_array_new(c))) goto catch; + *cursor = '\0'; + /* Binary files with embedded '\0' are not allowed. */ + if(strchr(c->data, '\0') != cursor) { errno = EILSEQ; goto catch; } + { success = 1; goto finally; } +catch: + if(!errno) errno = EILSEQ; /* Will never be true on POSIX. */ +finally: + if(fp) fclose(fp); + return success; +} /** [`s`,`e`) => `n` */ static int parse_natural(const char *s, const char *const e, unsigned *const n) { @@ -21,6 +59,86 @@ static int parse_natural(const char *s, const char *const e, unsigned *const n) return 1; } + + +#define BOOKS \ + X(Genesis),\ + X(Exodus),\ + X(Leviticus),\ + X(Numbers),\ + X(Deuteronomy),\ + X(Joshua),\ + X(Judges),\ + X(Ruth),\ + X(ISamuel),\ + X(IISamuel),\ + X(IKings),\ + X(IIKings),\ + X(IChronicles),\ + X(IIChronicles),\ + X(Ezra),\ + X(Nehemiah),\ + X(Esther),\ + X(Job),\ + X(Psalms),\ + X(Proverbs),\ + X(Ecclesiastes),\ + X(Song_of_Solomon),\ + X(Isaiah),\ + X(Jeremiah),\ + X(Lamentations),\ + X(Ezekiel),\ + X(Daniel),\ + X(Hosea),\ + X(Joel),\ + X(Amos),\ + X(Obadiah),\ + X(Jonah),\ + X(Micah),\ + X(Nahum),\ + X(Habakkuk),\ + X(Zephaniah),\ + X(Haggai),\ + X(Zechariah),\ + X(Malachi),\ + \ + X(Matthew),\ + X(Mark),\ + X(Luke),\ + X(John),\ + X(Acts),\ + X(Romans),\ + X(ICorinthians),\ + X(IICorinthians),\ + X(Galatians),\ + X(Ephesians),\ + X(Philippians),\ + X(Colossians),\ + X(IThessalonians),\ + X(IIThessalonians),\ + X(ITimothy),\ + X(IITimothy),\ + X(Titus),\ + X(Philemon),\ + X(Hebrews),\ + X(James),\ + X(IPeter),\ + X(IIPeter),\ + X(IJohn),\ + X(IIJohn),\ + X(IIIJohn),\ + X(Jude),\ + X(Revelation),\ + X(KJV_BOOK_SIZE) + +#define X(book) book +enum kjv_book { BOOKS }; +#undef X +#define X(book) #book +static const char *kjv_book_string[] = { BOOKS }; +#undef X +#undef BOOKS + /*!re2c /**/ re2c:yyfill:enable = 0; re2c:define:YYCTYPE = char; @@ -31,7 +149,7 @@ word = [^ \t\v\f\n\x00]+; /** `fn` contains "[*].txt", sticks that in `book_no`, otherwise returns false. */ -int kjv_filename(const char *fn, unsigned *book_no) { +static int kjv_filename(const char *fn, unsigned *const book_no) { const char *YYCURSOR = fn, *YYMARKER, *yyt1, *yyt2, *s0, *s1; assert(fn && book_no); /*!re2c /**/ @@ -42,30 +160,99 @@ int kjv_filename(const char *fn, unsigned *book_no) { */ } -#if 0 +struct lex { + size_t line; + const char *cursor; + int error; + unsigned chapter, verse, words; +}; + +static struct lex lex(const char *cursor) { + struct lex lex; + assert(cursor); + lex.line = 1; + lex.cursor = cursor; + lex.error = 0; + lex.chapter = lex.verse = lex.words = 0; + return lex; +} /*!conditions:re2c*/ -enum kjv_status kjv_chapter(const char *YYCURSOR, struct book *const book) { - const char *YYMARKER, *s0, *s1; - int c = yycinit; +static int lex_next_verse(struct lex *const lex) { + const char *YYMARKER, *yyt1 = 0, *yyt2 = 0, *s0, *s1, *t0, *t1; + enum YYCONDTYPE condition = yycline; /*!re2c /**/ - re2c:define:YYGETCONDITION = "c"; - re2c:define:YYSETCONDITION = "c = @@;"; - */ - assert(book); - /*YYCURSOR = book;*/ - return KJV_ERROR; + re2c:define:YYCURSOR = lex->cursor; + re2c:define:YYGETCONDITION = "condition"; + re2c:define:YYSETCONDITION = "condition = @@;"; + re2c:define:YYGETCONDITION:naked = 1; + re2c:define:YYSETCONDITION:naked = 1; */ + assert(lex && lex->cursor); + lex->error = 0; scan: /*!re2c /**/ - <*> * { return KJV_ERROR; } - <*> "\x00" { return KJV_DONE; } - [^\n\x00]* "\n" { goto scan; } - "[" natural ":" natural "]" :=> verse - whitespace+ { goto scan; } - @s0 word @s1 { + <*> * { printf("catch\n"); return errno = EILSEQ, lex->error = 1, 0; } + [^[\]\n\x00]* "\n" { printf("comment\n"); lex->line++; goto scan; } + "\x00" { printf("eof\n"); return 0; } + "[" @s0 natural @s1 ":" @t0 natural @t1 "]" => verse { + if(!parse_natural(s0, s1, &lex->chapter) + || !parse_natural(t0, t1, &lex->verse)) + return errno = EILSEQ, lex->error = 1, 0; + lex->words = 0; + printf("%u:%u", lex->chapter, lex->verse); + goto scan; } + whitespace+ { goto scan; } + @s0 word @s1 { lex->words++; goto scan; } + "\n" { printf(" -> %u\n", lex->words); lex->line++; return 1; } */ } -#endif +int main(void) { + const char *const dir_name = "KJV"; + struct char_array kjv[KJV_BOOK_SIZE] = { 0 }; + int success = EXIT_SUCCESS; + DIR *dir = 0; + struct dirent *de = 0; + unsigned i; + size_t words = 0; + errno = 0; + + /* Read in the kjv from all files. This is overkill, we don't need to keep + all the data, just count. Maybe we'll do something else later? */ + if(chdir(dir_name) == -1 || !(dir = opendir("."))) goto catch; + while((de = readdir(dir))) { /* For all files in directory. */ + unsigned ordinal; + enum kjv_book b; + if(!kjv_filename(de->d_name, &ordinal)) /* Extract ordinal. */ + { fprintf(stderr, "Ignored <%s>.\n", de->d_name); continue; } + fprintf(stderr, "<%s> ordinal: %u\n", de->d_name, ordinal); + if(ordinal < 1 || ordinal > KJV_BOOK_SIZE) + { errno = ERANGE; goto catch; } /* Not in range. */ + if(kjv[b = ordinal - 1].data) /* Convert to zero-based. */ + { errno = EDOM; goto catch; } /* Duplicate. */ + if(!append_file(kjv + b, de->d_name)) goto catch; + } + closedir(dir), de = 0, dir = 0; + + /* Parse. */ + for(i = 0; i < KJV_BOOK_SIZE; i++) { + struct lex x = lex(kjv[i].data); + if(!x.cursor) { fprintf(stderr, "Missing book %s.\n", + kjv_book_string[i]); errno = EDOM; goto catch; } + printf("%s: cumulative %zu.\n", kjv_book_string[i], words); + while(lex_next_verse(&x)) words += x.words; + if(x.error) goto catch; + } + + printf("kjv: %zu words\n", words); + goto finally; +catch: + success = EXIT_FAILURE; + perror(de ? de->d_name : dir_name); + if(dir && closedir(dir)) perror(dir_name); +finally: + for(i = 0; i < KJV_BOOK_SIZE; i++) char_array_(kjv + i); + return success; +} diff --git a/kjv/src/main.c b/kjv/src/main.c deleted file mode 100644 index 7c0db18..0000000 --- a/kjv/src/main.c +++ /dev/null @@ -1,108 +0,0 @@ -/** @license 2022 Neil Edelman, distributed under the terms of the - [MIT License](https://opensource.org/licenses/MIT). */ - -#include "kjv.h" -#include -#include -#include -#include -#include /* opendir readdir closedir */ -#include /* chdir (POSIX) (because I'm lazy) */ - -#define ARRAY_NAME char -#define ARRAY_TYPE char -#include "array.h" - -/** Append a text file, `fn`, to `c`, and add a '\0'. - @return Success. A partial read is failure. @throws[fopen, fread, malloc] - @throws[EISEQ] The text file has embedded nulls. - @throws[ERANGE] If the standard library does not follow POSIX. */ -static int append_file(struct char_array *c, const char *const fn) { - FILE *fp = 0; - const size_t granularity = 1024; - size_t nread; - char *cursor; - int success = 0; - assert(c && fn); - if(!(fp = fopen(fn, "r"))) goto catch; - /* Read entire file in chunks. */ - do if(!(cursor = char_array_buffer(c, granularity)) - || (nread = fread(cursor, 1, granularity, fp), ferror(fp)) - || !char_array_append(c, nread)) goto catch; - while(nread == granularity); - /* File to `C` string. */ - if(!(cursor = char_array_new(c))) goto catch; - *cursor = '\0'; - /* Binary files with embedded '\0' are not allowed. */ - if(strchr(c->data, '\0') != cursor) { errno = EILSEQ; goto catch; } - { success = 1; goto finally; } -catch: - if(!errno) errno = EILSEQ; /* Will never be true on POSIX. */ -finally: - if(fp) fclose(fp); - return success; -} - -#define ARRAY_NAME verse -#define ARRAY_TYPE struct verse_array -#include "array.h" - -struct book { struct char_array backing; struct verse_array chapter; }; - -int main_new_chapter(struct book *const book) { - assert(book); - return 0; -} - -struct verse *main_new_verse(struct verse_array *const chapter) { - assert(chapter); - return verse_array_new(chapter); -} - -int main(void) { - const char *const dir_name = "KJV"; - struct book kjv[KJV_BOOK_SIZE] = { 0 }; - int success = EXIT_SUCCESS; - DIR *dir = 0; - struct dirent *de = 0; - unsigned i; - errno = 0; - - /* Read in the kjv from all files. - fixme: this is lazy; all one object would be best. */ - if(chdir(dir_name) == -1 || !(dir = opendir("."))) goto catch; - while((de = readdir(dir))) { /* For all files in directory. */ - unsigned ordinal; - enum kjv_book b; - if(!kjv_filename(de->d_name, &ordinal)) /* Extract ordinal. */ - { fprintf(stderr, "Ignored <%s>.\n", de->d_name); continue; } - printf("<%s> ordinal: %u\n", de->d_name, ordinal); - if(ordinal < 1 || ordinal > KJV_BOOK_SIZE) - { errno = ERANGE; goto catch; } /* Not in range. */ - if(kjv[b = ordinal - 1].backing.data) /* Convert to zero-based. */ - { errno = EDOM; goto catch; } /* Duplicate. */ - if(!append_file(&kjv[b].backing, de->d_name)) goto catch; - } - closedir(dir), de = 0, dir = 0; - - /* Parse the files into chapters. */ - for(i = 0; i < KJV_BOOK_SIZE; i++) { - if(!kjv[i].backing.data) { fprintf(stderr, "Missing book %u.\n", i + 1); - errno = EDOM; goto catch; } - /*for( ; ; ) { switch(kjv_chapter(kjv + i)) { - case KJV_ERROR: goto catch; - case KJV_DONE: goto finally; - case KJV_CHAPTER: break; - }}*/ - } - - goto finally; -catch: - success = EXIT_FAILURE; - perror(de ? de->d_name : dir_name); - if(dir && closedir(dir)) perror(dir_name); -finally: - for(i = 0; i < KJV_BOOK_SIZE; i++) - char_array_(&kjv[i].backing), verse_array_(&kjv[i].chapter); - return success; -} diff --git a/kjv/src/main.h b/kjv/src/main.h deleted file mode 100644 index 2aed449..0000000 --- a/kjv/src/main.h +++ /dev/null @@ -1,4 +0,0 @@ -#include -struct verse { const char *s, *e; size_t words; }; -struct verse_array; -struct verse *main_new_verse(struct verse_array *);