diff --git a/Makefile b/Makefile index 6bd1880..034a790 100644 --- a/Makefile +++ b/Makefile @@ -39,9 +39,9 @@ default: $(projects) bin/test-text: build/text.o build/test_text.o bin/test-journal: build/text.o build/journal.o build/test_journal.o bin/test-source: build/text.o build/pair.o build/journal.o build/source.o build/test_source.o -bin/test-kjv: build/text.o build/pair.o build/kjv.o build/test_kjv.o -bin/kjv: build/text.o build/pair.o build/journal.o build/kjv.o build/scan_kjv.o -bin/flight: build/text.o build/pair.o build/journal.o build/source.o build/flight.o build/flighthours.o +bin/test-kjv: build/text.o build/pair.o build/kjvcount.o build/test_kjv.o +bin/kjv: build/text.o build/pair.o build/journal.o build/kjvcount.o build/kjv.o +bin/flight: build/text.o build/pair.o build/journal.o build/source.o build/flights.o build/flighthours.o bin/%: @echo "\033[1;36mlinking $@\033[0m" diff --git a/src/flighthours.c b/src/flighthours.c index b066dd4..75051b6 100644 --- a/src/flighthours.c +++ b/src/flighthours.c @@ -4,7 +4,7 @@ Date _vs_ hours flown. */ #include "journal.h" -#include "flight.h" +#include "flights.h" #include "source.h" #include #include diff --git a/src/flight.h b/src/flights.h similarity index 89% rename from src/flight.h rename to src/flights.h index 01b4371..c0da942 100644 --- a/src/flight.h +++ b/src/flights.h @@ -1,5 +1,5 @@ #if defined BASE \ - || !defined BASE && !defined GENERIC && !defined PROTO /* */ -#if defined GENERIC \ - || !defined BASE && !defined GENERIC && !defined PROTO /* */ -#if defined GENERIC \ - || !defined BASE && !defined GENERIC && !defined PROTO /* */ -#if defined GENERIC \ - || !defined BASE && !defined GENERIC && !defined PROTO /* */ +struct kjvline_tree_iterator { struct tree_kjvline_iterator _; }; +#endif /* private --> */ #if defined PROTO \ - || !defined BASE && !defined GENERIC && !defined PROTO /* */ #ifdef BASE #undef BASE #endif -#ifdef GENERIC -#undef GENERIC +#ifdef PRIVATE +#undef PRIVATE #endif #ifdef PROTO #undef PROTO diff --git a/src/kjv.re.c b/src/kjv.re.c index 3a558aa..d0c3c11 100644 --- a/src/kjv.re.c +++ b/src/kjv.re.c @@ -1,244 +1,314 @@ -/** Run with a `KJV` sub-directory, - , outputs a - `gperf` file that has all the the words of all the verses in `kjv.h` format. - @license 2022 Neil Edelman, distributed under the terms of the - [MIT License](https://opensource.org/licenses/MIT). Uses the KJV at - [bible databases](https://github.com/scrollmapper/bible_databases/tree/master), - "All included Bible translations are in the public domain." - @std C11 */ +/** @license 2022 Neil Edelman, distributed under the terms of the + [MIT License](https://opensource.org/licenses/MIT). + Scan journal entries for kjv references. */ + +#include "../src/journal.h" #define BASE -#include "../src/kjv.h" /* Just the base data. */ -#include "../src/text.h" +#include "../src/kjv.h" #include "../src/pair.h" -#include -#include +#include /* C99 */ #include +#include +#include #include -#include /* opendir readdir closedir */ -#include /* chdir (POSIX) (because I'm lazy) */ -/* #include No; overkill. */ +#include -void kjvcite_to_string(const union kjvcite x, char (*const a)[12]) - { sprintf(*a, "%.4s%" PRIu32 ":%" PRIu32, - kjv_book_string[x.book < KJV_BOOK_SIZE ? x.book : KJV_BOOK_SIZE], - x.chapter % 1000, x.verse % 1000); } - -/* Reversible hash map. */ -/** - on `x`. */ -static uint32_t lowbias32(uint32_t x) { - x ^= x >> 16; - x *= 0x7feb352dU; - x ^= x >> 15; - x *= 0x846ca68bU; - x ^= x >> 16; - return x; -} -/* Inverts `x`. */ -static uint32_t lowbias32_r(uint32_t x) { - x ^= x >> 16; - x *= 0x43021123U; - x ^= x >> 15 ^ x >> 30; - x *= 0x1d69e2a5U; - x ^= x >> 16; - return x; -} - -static uint32_t kjvset_hash(const union kjvcite x) { return lowbias32(x.u32); } -static union kjvcite kjvset_unhash(const uint32_t x) - { union kjvcite k; k.u32 = lowbias32_r(x); return k; } -static void kjvset_to_string(const union kjvcite x, char (*const a)[12]) - { kjvcite_to_string(x, a); } -#define TABLE_NAME kjvset -#define TABLE_KEY union kjvcite -#define TABLE_UINT uint32_t -#define TABLE_UNHASH -#define TABLE_TO_STRING -#include "../src/table.h" - -/* Derived information on verse word count. */ -static uint32_t verse_hash(const union kjvcite x) { return kjvset_hash(x); } -static union kjvcite verse_unhash(const uint32_t x) { return kjvset_unhash(x); } -static void verse_to_string(const union kjvcite x, const unsigned count, - char (*const a)[12]) { (void)count; kjvset_to_string(x, a); } -#define TABLE_NAME verse -#define TABLE_KEY union kjvcite -#define TABLE_UINT uint32_t -#define TABLE_VALUE unsigned /* Count words. */ -#define TABLE_UNHASH -#define TABLE_DEFAULT 0 -#define TABLE_TO_STRING -#include "../src/table.h" - - -/* Parse filename of books. */ -/*!re2c /**/ -re2c:yyfill:enable = 0; -re2c:define:YYCTYPE = char; -natural = [1-9][0-9]*; -whitespace = [ \t\v\f]; -word = [^ \t\v\f\n\x00]+; -*/ -/** `fn` contains "[*].txt", sticks that in `book_no`, otherwise - returns false. */ -static int looks_like_book_fn(const char *fn, unsigned *const book_no) { - const char *YYCURSOR = fn, *YYMARKER, *yyt1, *yyt2, *s0, *s1; - assert(fn && book_no); - /*!re2c /**/ - * - { return 0; } - @s0 natural @s1 [^.\x00]* ".txt" "\x00" - { return pair_to_natural(s0, s1, book_no); } - */ -} - - -/* This is the contents of the . */ -struct lex { - size_t line; - const char *cursor; - int error; - uint32_t chapter, verse, words; -}; -static struct lex lex(const char *cursor) { - struct lex lex; - assert(cursor); - lex.line = 1; - lex.cursor = cursor; - lex.error = 0; - lex.chapter = lex.verse = lex.words = 0; - return lex; -} -/*!conditions:re2c*/ -static int lex_next_verse(struct lex *const lex) { - const char *YYMARKER, *yyt1 = 0, *yyt2 = 0, *s0, *s1, *t0, *t1; - enum YYCONDTYPE condition = yycline; - /*!re2c /**/ - re2c:define:YYCURSOR = lex->cursor; - re2c:define:YYGETCONDITION = "condition"; - re2c:define:YYSETCONDITION = "condition = @@;"; - re2c:define:YYGETCONDITION:naked = 1; - re2c:define:YYSETCONDITION:naked = 1; */ - assert(lex && lex->cursor); - lex->error = 0; -scan: - /*!re2c /**/ - <*> * { return errno = EILSEQ, lex->error = 1, 0; } - [^[\]\n\x00]* "\n" { lex->line++; goto scan; } - "\x00" { return 0; } - "[" @s0 natural @s1 ":" @t0 natural @t1 "]" => verse { - if(!pair_to_natural(s0, s1, &lex->chapter) - || !pair_to_natural(t0, t1, &lex->verse)) - return errno = EILSEQ, lex->error = 1, 0; - lex->words = 0; - /*printf("%u:%u", lex->chapter, lex->verse);*/ - goto scan; - } - whitespace+ { goto scan; } - @s0 word @s1 { lex->words++; goto scan; } - "\n" { /*printf(" -> %u\n", lex->words);*/ lex->line++; return 1; } - */ -} +static void kjvline_to_string(const union line64 line, const struct kjvrange *u, + char (*const a)[12]) { (void)u; date32_to_string(line.date, a); } +static int kjvline_compare(const union line64 a, const union line64 b) + { return a.u64 > b.u64; } +#define TREE_NAME kjvline +#define TREE_KEY union line64 +#define TREE_VALUE struct kjvrange +#define TREE_COMPARE +#define TREE_TO_STRING +#include "../src/tree.h" #define PROTO -#include "../src/kjv.h" /* Just the kjv and prototypes. */ +#include "../src/kjv.h" /* proto */ -/** Frees `kjv`. */ -void kjv_(struct kjv *const kjv) { - if(!kjv) return; - kjvset_table_(&kjv->set); - verse_table_(&kjv->verses); + +/*!conditions:re2c*/ + +static int scan(union date32 date, const char *const buffer, + struct kjvline_tree *const lines) { + const char *YYCURSOR, *YYMARKER, *yyt1, *yyt2, *yyt3, *s0, *s1, *t0, *t1; + enum kjv_book book = Revelation; + uint32_t chapter = 0, verse = 0, verse_end = 0; + enum YYCONDTYPE condition = yycline; + size_t line = 1; + const char *why = "unexpected"; + assert(buffer && lines); + YYCURSOR = YYMARKER = yyt1 = buffer; + /*!re2c /**/ + re2c:define:YYCTYPE = char; + re2c:yyfill:enable = 0; + re2c:define:YYGETCONDITION = "condition"; + re2c:define:YYSETCONDITION = "condition = @@;"; + re2c:define:YYGETCONDITION:naked = 1; + re2c:define:YYSETCONDITION:naked = 1; + + unix_control = [\x01-\x08\x0b-\x1f\x7f]; + ws = [ \t]; + glyph = [^] \ ("\x00" | "\n" | unix_control | ws); + natural = [1-9][0-9]*; + engage = ws+ "--" ws+; + /* (natural ":")? Don't use for memorizing and use for reading, I think? */ + /*("``"|"\"") This is not in the next book. */ + lookat = ws* natural ":" natural [ab]? ("-" natural [ab]?)? engage; + first = ("I" | "1") " "?; + second = ("II" | "2") " "?; + third = ("III" | "3") " "?; + */ + for( ; ; ) { /*!re2c /**/ + /* Default ignore. */ + [^\n\x00] { continue; } + "\x00" { why = "no newline at end of file"; goto catch; } + "\x00" { return 1; } + "\n" => line { line++; continue; } + * :=> skip + + /* Books. */ + "Genesis" / lookat => book { book = Genesis; continue; } + "Exodus" / lookat => book { book = Exodus; continue; } + "Leviticus" / lookat => book { book = Leviticus; continue; } + "Numbers" / lookat => book { book = Numbers; continue; } + "Deuteronomy" / lookat => book { book = Deuteronomy; continue; } + "Joshua" / lookat => book { book = Joshua; continue; } + "Judges" / lookat => book { book = Judges; continue; } + "Ruth" / lookat => book { book = Ruth; continue; } + first "Samuel" / lookat => book { book = ISamuel; continue; } + second "Samuel" / lookat => book { book = IISamuel; continue; } + first "Kings" / lookat => book { book = IKings; continue; } + second "Kings" / lookat => book { book = IIKings; continue; } + first "Chronicles" / lookat + => book { book = IChronicles; continue; } + second "Chronicles" / lookat + => book { book = IIChronicles; continue; } + "Ezra" / lookat => book { book = Ezra; continue; } + "Nehemiah" / lookat => book { book = Nehemiah; continue; } + "Esther" / lookat => book { book = Esther; continue; } + "Job" / lookat => book { book = Job; continue; } + "Psalms" / lookat => book { book = Psalms; continue; } + "Proverbs" / lookat => book { book = Proverbs; continue; } + "Ecclesiastes" / lookat + => book { book = Ecclesiastes; continue; } + "Song of Solomon" / lookat + => book { book = Song_of_Solomon; continue; } + "Isaiah" / lookat => book { book = Isaiah; continue; } + "Jeremiah" / lookat => book { book = Jeremiah; continue; } + "Lamentations" / lookat + => book { book = Lamentations; continue; } + "Ezekiel" / lookat => book { book = Ezekiel; continue; } + "Daniel" / lookat => book { book = Daniel; continue; } + "Hosea" / lookat => book { book = Hosea; continue; } + "Joel" / lookat => book { book = Joel; continue; } + "Amos" / lookat => book { book = Amos; continue; } + "Obadiah" / lookat => book { book = Obadiah; continue; } + "Jonah" / lookat => book { book = Jonah; continue; } + "Micah" / lookat => book { book = Micah; continue; } + "Nahum" / lookat => book { book = Nahum; continue; } + "Habakkuk" / lookat => book { book = Habakkuk; continue; } + "Zephaniah" / lookat => book { book = Zephaniah; continue; } + "Haggai" / lookat => book { book = Haggai; continue; } + "Zechariah" / lookat => book { book = Zechariah; continue; } + "Malachi" / lookat => book { book = Malachi; continue; } + "Matthew" / lookat => book { book = Matthew; continue; } + "Mark" / lookat => book { book = Mark; continue; } + "Luke" / lookat => book { book = Luke; continue; } + "John" / lookat => book { book = John; continue; } + "Acts" / lookat => book { book = Acts; continue; } + "Romans" / lookat => book { book = Romans; continue; } + first "Corinthians" / lookat + => book { book = ICorinthians; continue; } + second "Corinthians" / lookat + => book { book = IICorinthians; continue; } + "Galatians" / lookat => book { book = Galatians; continue; } + "Ephesians" / lookat => book { book = Ephesians; continue; } + "Philippians" / lookat => book { book = Philippians; continue; } + "Colossians" / lookat => book { book = Colossians; continue; } + first "Thessalonians" / lookat + => book { book = IThessalonians; continue; } + second "Thessalonians" / lookat + => book { book = IIThessalonians; continue; } + first "Timothy" / lookat => book { book = ITimothy; continue; } + second "Timothy" / lookat => book { book = IITimothy; continue; } + "Titus" / lookat => book { book = Titus; continue; } + "Philemon" / lookat => book { book = Philemon; continue; } + "Hebrews" / lookat => book { book = Hebrews; continue; } + "James" / lookat => book { book = James; continue; } + first "Peter" / lookat => book { book = IPeter; continue; } + second "Peter" / lookat => book { book = IIPeter; continue; } + first "John" / lookat => book { book = IJohn; continue; } + second "John" / lookat => book { book = IIJohn; continue; } + third "John" / lookat => book { book = IIIJohn; continue; } + "Jude" / lookat => book { book = Jude; continue; } + "Revelation" / lookat => book { book = Revelation; continue; } + + /* Extract further information. */ + * { why = "default unrecognized"; goto catch; } + /* 19:15a, just ignore the a. */ + ws+ @s0 natural @s1 ":" @t0 natural @t1 [ab]? { + if(chapter || verse || verse_end) + { why = "reference unrecognized"; goto catch; } + if(!pair_to_natural(s0, s1, &chapter) + || !pair_to_natural(t0, t1, &verse)) + { why = "reference numerical error"; goto catch; } + continue; + } + "-" @s0 natural @s1 [ab]? { /* Verse range. */ + if(!chapter || !verse || verse_end) + { why = "range unrecognized"; goto catch; } + if(!pair_to_natural(s0, s1, &verse_end)) + { why = "range numerical error"; goto catch; } + continue; + } + engage => skip { + if(!chapter || !verse) { why = "missing information"; goto catch; } + if(verse_end && verse_end <= verse) + { why = "interval error"; goto catch; } /* 0 or valid. */ + const union line64 key + = {{ (uint32_t)line, {{ date.day, date.month, date.year }} }}; + struct kjvrange *value; + switch(kjvline_tree_try(lines, key, &value)) { + case TREE_PRESENT: why = "duplicate key"; + case TREE_ERROR: goto catch; + case TREE_ABSENT: + value->start.book = book; + value->start.chapter = chapter; + value->start.verse = verse; + value->verse_end = verse_end; + break; + } + book = Revelation, chapter = 0, verse = 0, verse_end = 0; + continue; + } + */ } + assert(0); /* Never gets here. */ +catch: + if(!errno) errno = EILSEQ; + { + char datestr[12]; + date32_to_string(date, &datestr); + fprintf(stderr, "%s\n" + "%s line %zu: %s.\n", buffer, datestr, line, why); + } + return 0; } -/** Loads 66 files from the "kjv/" directory. Prints out something if it - doesn't work, but does not call `perror` or reset `errno`. Use - to tell. */ -struct kjv kjv(void) { - const char *const dir_kjv = "kjv"; - struct text backing = text(); - struct kjv kjv = { 0 }; - DIR *dir = 0; - struct dirent *de = 0; - struct { size_t offset; int is; } build[KJV_BOOK_SIZE] = { 0 }; - enum kjv_book b = 0; - int is_in_kjv = 0; +void kjv_line_(struct kjvline_tree *const f) { kjvline_tree_(f); } - /* For all files in directory KJV with <#>*.txt, read into backing. */ - if(chdir(dir_kjv) == -1 || (is_in_kjv = 1, !(dir = opendir(".")))) - goto catch; - while((de = readdir(dir))) { - unsigned ordinal; - char *unstable_backing; - if(!looks_like_book_fn(de->d_name, &ordinal)) continue; - /*fprintf(stderr, "<%s> ordinal: %u\n", de->d_name, ordinal);*/ - if(ordinal < 1 || ordinal > KJV_BOOK_SIZE) - { errno = ERANGE; goto catch; } /* Not in range. */ - if(build[b = ordinal - 1].is) /* Convert to zero-based. */ - { errno = EDOM; goto catch; } /* Is duplicate. */ - if(!(unstable_backing = text_append_file(&backing, de->d_name))) - goto catch; - build[b].is = 1; - build[b].offset = (size_t)(unstable_backing - backing.a.data); - } - if(closedir(dir) == -1) { dir = 0; goto catch; } dir = 0; - - /* Now backing is stable; count all the words for each verse. */ - for(b = 0; b < KJV_BOOK_SIZE; b++) { - struct lex x; - if(!build[b].is) { fprintf(stderr, "Missing book [%u]%s.\n", - b + 1, kjv_book_string[b]); errno = EDOM; goto catch; } - x = lex(backing.a.data + build[b].offset); - while(lex_next_verse(&x)) { - const union kjvcite cite - = { .book = b, .chapter = x.chapter, .verse = x.verse }; - unsigned *words; - switch(verse_table_assign(&kjv.verses, cite, &words)) { - case TABLE_PRESENT: fprintf(stderr, "[%u]%s %u:%u duplicated.\n", - b + 1, kjv_book_string[b], x.chapter, x.verse); errno = EDOM; - case TABLE_ERROR: goto catch; - case TABLE_ABSENT: break; - } - *words = x.words, kjv.words.total += x.words; - } - if(x.error) { fprintf(stderr, "[%u]%s on line %zu\n", - b + 1, kjv_book_string[b], x.line); goto catch; } - } +struct kjvline_tree kjv_line(struct journal *const j) { + struct kjvline_tree lines; + struct journal_iterator it; + union date32 date; + const char *text; + assert(j); + lines = kjvline_tree(); + it = journal_iterator(j); + while(journal_next(&it, &date, &text)) + if(!scan(date, text, &lines)) goto catch; goto finally; catch: - if(de) fprintf(stderr, "While reading %s/%s.\n", dir_kjv, de->d_name); - else fprintf(stderr, "In directory %s/.\n", dir_kjv); -recatch: - kjv_(&kjv); + kjv_line_(&lines); finally: - if(dir) { if(closedir(dir)) { dir = 0; goto recatch; } dir = 0; } - if(is_in_kjv && (is_in_kjv = 0, chdir("..") == -1)) goto recatch; - text_(&backing); - return kjv; + return lines; } -/** Has loaded properly? Otherwise, probably `errno` is set. */ -int kjv_is_empty(const struct kjv *const kjv) - { return !kjv || !kjv->verses.buckets; } +int kjv_line_is_empty(const struct kjvline_tree *const lines) + { return !lines || !lines->root.node; } -/** Adds `cite` to `kjv` if not present. Only used in test. - @return Is the kjv still valid. */ -int kjv_add(struct kjv *const kjv, const union kjvcite cite) { - if(!kjv) return 0; - kjv->words.verse = verse_table_get(&kjv->verses, cite); - kjv->words.cumulative += kjv->words.verse; - switch(kjvset_table_try(&kjv->set, cite)) { - case TABLE_ERROR: return 0; - case TABLE_ABSENT: kjv->words.set += kjv->words.verse; /* Sic. */ - case TABLE_PRESENT: break; - } +const char *kjv_line_to_string(const struct kjvline_tree *const kl) + { return kjvline_tree_to_string(kl); } + +struct kjvline_tree_iterator kjv_line_iterator(struct kjvline_tree *const kl) + { return kjvline_tree_iterator(kl); } + +int kjv_line_next(struct kjvline_tree_iterator *const it, union line64 *const k, + const struct kjvrange **const v) { + assert(it && k && v); + if(!kjvline_tree_next(it)) return 0; + *k = kjvline_tree_key(it); + *v = kjvline_tree_value(it); return 1; } -const char *kjv_to_string(const struct kjv *const kjv) - { return kjv ? verse_table_to_string(&kjv->verses) : ""; } -const char *kjv_set_to_string(const struct kjv *const kjv) - { return kjv ? kjvset_table_to_string(&kjv->set) : 0; } + + + + +#if 0 +char citestr[12], datestr[12]; +const struct source *src = source_lookup(&s, line); +assert(src); if(!src->name.a) { errno = EDOM; goto catch; } +kjvcite_to_string(cite, &citestr); +for( ; ; cite.verse++) { + if(!kjv_add(kj, cite)) return 0; + if(!verse_end || verse_end <= cite.verse) break; +} +date32_to_string(date, &datestr); +printf("%s\t%zu\t%zu\t%zu\t# ", + datestr, kj->words.verse, kj->words.set, kj->words.cumulative); +if(verse_end) { + printf("%s-%" PRIu32 "\n", citestr, verse_end); +} else { + printf("%s\n", citestr); +} + +#endif + +int main(void) { + int success = EXIT_SUCCESS; + const char *reason = "unknown"; + errno = 0; + + struct kjvcount count = kjvcount(); + if(kjvcount_is_empty(&count)) { reason = "kjv failed to load"; goto catch; } + + struct journal j = journal(); + if(journal_is_empty(&j)) { reason = "journal failed to load"; goto catch; } + fprintf(stderr, "Journal: %s.\n", journal_to_string(&j)); + + struct kjvline_tree lines = kjv_line(&j); + if(kjv_line_is_empty(&lines)) { reason = "parsing failed"; goto catch; } + fprintf(stderr, "Lines: %s.\n", kjv_line_to_string(&lines)); + + struct journal_iterator it; + union date32 k; + const char *v; + + printf("set term postscript eps enhanced\n" + "set output \"kjv.eps\"\n" + "$Data < /* C99 */ +union kjvcite { + struct { uint32_t verse : 12, chapter : 13, book : 7; }; /* C11, reverse */ + uint32_t u32; +}; +struct kjvrange { union kjvcite start; uint32_t verse_end; }; +void kjvcite_to_string(const union kjvcite, char (*)[12]); +#endif /* base --> */ + + +#if defined PRIVATE \ + || !defined BASE && !defined PRIVATE && !defined PROTO /* */ + + +#if defined PROTO \ + || !defined BASE && !defined PRIVATE && !defined PROTO /* */ + + +#ifdef BASE +#undef BASE +#endif +#ifdef PRIVATE +#undef PRIVATE +#endif +#ifdef PROTO +#undef PROTO +#endif diff --git a/src/kjvcount.re.c b/src/kjvcount.re.c new file mode 100644 index 0000000..40989cc --- /dev/null +++ b/src/kjvcount.re.c @@ -0,0 +1,249 @@ +/** Run with a `kjv` sub-directory. Two functionaries: counts all words from + verses + dynamically and puts them into an unchanging `kjvcount_table` on initialisation. + Then has a set to each of the verses, `kjvset`, which starts off empty. + @license 2022 Neil Edelman, distributed under the terms of the + [MIT License](https://opensource.org/licenses/MIT). Uses the KJV at + [bible databases](https://github.com/scrollmapper/bible_databases/tree/master), + "All included Bible translations are in the public domain." + @std C11 */ + +#define BASE +#include "../src/kjvcount.h" +#include "../src/text.h" +#include "../src/pair.h" +#include +#include +#include +#include +#include /* opendir readdir closedir */ +#include /* chdir (POSIX) (because I'm lazy) */ +/* #include No; overkill. */ + + +void kjvcite_to_string(const union kjvcite x, char (*const a)[12]) + { sprintf(*a, "%.4s%" PRIu32 ":%" PRIu32, + kjv_book_string[x.book < KJV_BOOK_SIZE ? x.book : KJV_BOOK_SIZE], + x.chapter % 1000, x.verse % 1000); } + +/* Reversible hash map. */ +/** + on `x`. */ +static uint32_t lowbias32(uint32_t x) { + x ^= x >> 16; + x *= 0x7feb352dU; + x ^= x >> 15; + x *= 0x846ca68bU; + x ^= x >> 16; + return x; +} +/* Inverts `x`. */ +static uint32_t lowbias32_r(uint32_t x) { + x ^= x >> 16; + x *= 0x43021123U; + x ^= x >> 15 ^ x >> 30; + x *= 0x1d69e2a5U; + x ^= x >> 16; + return x; +} + +/* Set of verses. */ +static uint32_t kjvset_hash(const union kjvcite x) { return lowbias32(x.u32); } +static union kjvcite kjvset_unhash(const uint32_t x) + { union kjvcite k; k.u32 = lowbias32_r(x); return k; } +static void kjvset_to_string(const union kjvcite x, char (*const a)[12]) + { kjvcite_to_string(x, a); } +#define TABLE_NAME kjvset +#define TABLE_KEY union kjvcite +#define TABLE_UINT uint32_t +#define TABLE_UNHASH +#define TABLE_TO_STRING +#include "../src/table.h" + +/* Derived information on verse word count. */ +static uint32_t count_hash(const union kjvcite x) { return kjvset_hash(x); } +static union kjvcite count_unhash(const uint32_t x) { return kjvset_unhash(x); } +static void count_to_string(const union kjvcite x, const unsigned count, + char (*const a)[12]) { (void)count; kjvset_to_string(x, a); } +#define TABLE_NAME count +#define TABLE_KEY union kjvcite +#define TABLE_UINT uint32_t +#define TABLE_VALUE unsigned /* Count words. */ +#define TABLE_UNHASH +#define TABLE_DEFAULT 0 +#define TABLE_TO_STRING +#include "../src/table.h" + + +/* Parse filename of books. */ +/*!re2c /**/ +re2c:yyfill:enable = 0; +re2c:define:YYCTYPE = char; +natural = [1-9][0-9]*; +whitespace = [ \t\v\f]; +word = [^ \t\v\f\n\x00]+; +*/ +/** `fn` contains "[*].txt", sticks that in `book_no`, otherwise + returns false. */ +static int looks_like_book_fn(const char *fn, unsigned *const book_no) { + const char *YYCURSOR = fn, *YYMARKER, *yyt1, *yyt2, *s0, *s1; + assert(fn && book_no); + /*!re2c /**/ + * + { return 0; } + @s0 natural @s1 [^.\x00]* ".txt" "\x00" + { return pair_to_natural(s0, s1, book_no); } + */ +} + + +/* This is the contents of the . */ +struct lex { + size_t line; + const char *cursor; + int error; + uint32_t chapter, verse, words; +}; +static struct lex lex(const char *cursor) { + struct lex lex; + assert(cursor); + lex.line = 1; + lex.cursor = cursor; + lex.error = 0; + lex.chapter = lex.verse = lex.words = 0; + return lex; +} +/*!conditions:re2c*/ +static int lex_next_verse(struct lex *const lex) { + const char *YYMARKER, *yyt1 = 0, *yyt2 = 0, *s0, *s1, *t0, *t1; + enum YYCONDTYPE condition = yycline; + /*!re2c /**/ + re2c:define:YYCURSOR = lex->cursor; + re2c:define:YYGETCONDITION = "condition"; + re2c:define:YYSETCONDITION = "condition = @@;"; + re2c:define:YYGETCONDITION:naked = 1; + re2c:define:YYSETCONDITION:naked = 1; */ + assert(lex && lex->cursor); + lex->error = 0; +scan: + /*!re2c /**/ + <*> * { return errno = EILSEQ, lex->error = 1, 0; } + [^[\]\n\x00]* "\n" { lex->line++; goto scan; } + "\x00" { return 0; } + "[" @s0 natural @s1 ":" @t0 natural @t1 "]" => verse { + if(!pair_to_natural(s0, s1, &lex->chapter) + || !pair_to_natural(t0, t1, &lex->verse)) + return errno = EILSEQ, lex->error = 1, 0; + lex->words = 0; + /*printf("%u:%u", lex->chapter, lex->verse);*/ + goto scan; + } + whitespace+ { goto scan; } + @s0 word @s1 { lex->words++; goto scan; } + "\n" { /*printf(" -> %u\n", lex->words);*/ lex->line++; return 1; } + */ +} + + +#define PROTO +#include "../src/kjvcount.h" /* Just the kjv and prototypes. */ + +/** Frees `kjv`. */ +void kjvcount_(struct kjvcount *const count) { + if(!count) return; + count_table_(&count->verses); + count->words.total = count->words.cumulative = count->words.set = 0; +} + +/** Loads 66 files from the "kjv/" directory. Prints out something if it + doesn't work, but does not call `perror` or reset `errno`. Use + to tell. */ +struct kjvcount kjvcount(void) { + const char *const dir_kjv = "kjv"; + struct text backing = text(); + struct kjvcount count = {0}; + DIR *dir = 0; + struct dirent *de = 0; + struct { size_t offset; int is; } build[KJV_BOOK_SIZE] = { 0 }; + enum kjv_book b = 0; + int is_in_kjv = 0; + + /* For all files in directory KJV with <#>*.txt, read into backing. */ + if(chdir(dir_kjv) == -1 || (is_in_kjv = 1, !(dir = opendir(".")))) + goto catch; + while((de = readdir(dir))) { + unsigned ordinal; + char *unstable_backing; + if(!looks_like_book_fn(de->d_name, &ordinal)) continue; + /*fprintf(stderr, "<%s> ordinal: %u\n", de->d_name, ordinal);*/ + if(ordinal < 1 || ordinal > KJV_BOOK_SIZE) + { errno = ERANGE; goto catch; } /* Not in range. */ + if(build[b = ordinal - 1].is) /* Convert to zero-based. */ + { errno = EDOM; goto catch; } /* Is duplicate. */ + if(!(unstable_backing = text_append_file(&backing, de->d_name))) + goto catch; + build[b].is = 1; + build[b].offset = (size_t)(unstable_backing - backing.a.data); + } + if(closedir(dir) == -1) { dir = 0; goto catch; } dir = 0; + + /* Now backing is stable; count all the words for each verse. */ + for(b = 0; b < KJV_BOOK_SIZE; b++) { + struct lex x; + if(!build[b].is) { fprintf(stderr, "Missing book [%u]%s.\n", + b + 1, kjv_book_string[b]); errno = EDOM; goto catch; } + x = lex(backing.a.data + build[b].offset); + while(lex_next_verse(&x)) { + const union kjvcite cite + = { .book = b, .chapter = x.chapter, .verse = x.verse }; + unsigned *words; + switch(count_table_assign(&count.verses, cite, &words)) { + case TABLE_PRESENT: fprintf(stderr, "[%u]%s %u:%u duplicated.\n", + b + 1, kjv_book_string[b], x.chapter, x.verse); errno = EDOM; + case TABLE_ERROR: goto catch; + case TABLE_ABSENT: break; + } + *words = x.words, count.words.total += x.words; + } + if(x.error) { fprintf(stderr, "[%u]%s on line %zu\n", + b + 1, kjv_book_string[b], x.line); goto catch; } + } + goto finally; +catch: + if(de) fprintf(stderr, "While reading %s/%s.\n", dir_kjv, de->d_name); + else fprintf(stderr, "In directory %s/.\n", dir_kjv); +recatch: + kjvcount_(&count); +finally: + if(dir) { if(closedir(dir)) { dir = 0; goto recatch; } dir = 0; } + if(is_in_kjv && (is_in_kjv = 0, chdir("..") == -1)) goto recatch; + text_(&backing); + return count; +} + +/** Has loaded properly? Otherwise, probably `errno` is set. */ +int kjvcount_is_empty(const struct kjvcount *const kjv) + { return !kjv || !kjv->verses.buckets; } + +const char *kjvcount_to_string(const struct kjvcount *const count) + { return count ? count_table_to_string(&count->verses) : ""; } + +struct kjvset_table kjv_set(void) { return kjvset_table(); } +void kjv_set_(struct kjvset_table *const set) { kjvset_table_(set); } +const char *kjv_set_to_string(const struct kjvset_table *const set) + { return set ? kjvset_table_to_string(set) : 0; } +/** Adds `cite` to `kjv` if not present. Only used in test. + @return Is the kjv still valid. */ +int kjv_set_add(struct kjvset_table *const set, + struct kjvcount *const count, const union kjvcite cite) { + size_t no_verse; + if(!set || !count) return 0; + no_verse = count_table_get(&count->verses, cite); + count->words.cumulative += no_verse; + switch(kjvset_table_try(set, cite)) { + case TABLE_ERROR: return 0; + case TABLE_ABSENT: count->words.set += no_verse; /* Sic. */ + case TABLE_PRESENT: break; + } + return 1; +} diff --git a/src/scan_kjv.re.c b/src/scan_kjv.re.c deleted file mode 100644 index d8b653d..0000000 --- a/src/scan_kjv.re.c +++ /dev/null @@ -1,244 +0,0 @@ -/** @license 2022 Neil Edelman, distributed under the terms of the - [MIT License](https://opensource.org/licenses/MIT). - - Scan journal entries for kjv references. */ - -#include "../src/journal.h" -#include "../src/kjv.h" -#include "../src/pair.h" -#include /* C99 */ -#include -#include -#include -#include -#include - -/* Callback is stupid; separate into tree with line and verse, and then more - processing. */ -typedef int (*scan_callback)(struct kjv *, union line64, union kjvcite, - uint32_t); - -/*!conditions:re2c*/ - -static int scan(union date32 date, const char *const buffer, - const scan_callback callback, struct kjv *const kj) { - const char *YYCURSOR, *YYMARKER, *yyt1, *yyt2, *yyt3, *s0, *s1, *t0, *t1; - enum kjv_book book = Revelation; - uint32_t chapter = 0, verse = 0, verse_end = 0; - enum YYCONDTYPE condition = yycline; - size_t line = 1; - const char *why = "unexpected"; - assert(buffer && kj); - YYCURSOR = YYMARKER = yyt1 = buffer; - /*!re2c /**/ - re2c:define:YYCTYPE = char; - re2c:yyfill:enable = 0; - re2c:define:YYGETCONDITION = "condition"; - re2c:define:YYSETCONDITION = "condition = @@;"; - re2c:define:YYGETCONDITION:naked = 1; - re2c:define:YYSETCONDITION:naked = 1; - - unix_control = [\x01-\x08\x0b-\x1f\x7f]; - ws = [ \t]; - glyph = [^] \ ("\x00" | "\n" | unix_control | ws); - natural = [1-9][0-9]*; - engage = ws+ "--" ws+; - /* (natural ":")? Don't use for memorizing and use for reading, I think? */ - /*("``"|"\"") This is not in the next book. */ - lookat = ws* natural ":" natural [ab]? ("-" natural [ab]?)? engage; - first = ("I" | "1") " "?; - second = ("II" | "2") " "?; - third = ("III" | "3") " "?; - */ - for( ; ; ) { /*!re2c /**/ - /* Default ignore. */ - [^\n\x00] { continue; } - "\x00" { why = "no newline at end of file"; goto catch; } - "\x00" { return 1; } - "\n" => line { line++; continue; } - * :=> skip - - /* Books. */ - "Genesis" / lookat => book { book = Genesis; continue; } - "Exodus" / lookat => book { book = Exodus; continue; } - "Leviticus" / lookat => book { book = Leviticus; continue; } - "Numbers" / lookat => book { book = Numbers; continue; } - "Deuteronomy" / lookat => book { book = Deuteronomy; continue; } - "Joshua" / lookat => book { book = Joshua; continue; } - "Judges" / lookat => book { book = Judges; continue; } - "Ruth" / lookat => book { book = Ruth; continue; } - first "Samuel" / lookat => book { book = ISamuel; continue; } - second "Samuel" / lookat => book { book = IISamuel; continue; } - first "Kings" / lookat => book { book = IKings; continue; } - second "Kings" / lookat => book { book = IIKings; continue; } - first "Chronicles" / lookat - => book { book = IChronicles; continue; } - second "Chronicles" / lookat - => book { book = IIChronicles; continue; } - "Ezra" / lookat => book { book = Ezra; continue; } - "Nehemiah" / lookat => book { book = Nehemiah; continue; } - "Esther" / lookat => book { book = Esther; continue; } - "Job" / lookat => book { book = Job; continue; } - "Psalms" / lookat => book { book = Psalms; continue; } - "Proverbs" / lookat => book { book = Proverbs; continue; } - "Ecclesiastes" / lookat - => book { book = Ecclesiastes; continue; } - "Song of Solomon" / lookat - => book { book = Song_of_Solomon; continue; } - "Isaiah" / lookat => book { book = Isaiah; continue; } - "Jeremiah" / lookat => book { book = Jeremiah; continue; } - "Lamentations" / lookat - => book { book = Lamentations; continue; } - "Ezekiel" / lookat => book { book = Ezekiel; continue; } - "Daniel" / lookat => book { book = Daniel; continue; } - "Hosea" / lookat => book { book = Hosea; continue; } - "Joel" / lookat => book { book = Joel; continue; } - "Amos" / lookat => book { book = Amos; continue; } - "Obadiah" / lookat => book { book = Obadiah; continue; } - "Jonah" / lookat => book { book = Jonah; continue; } - "Micah" / lookat => book { book = Micah; continue; } - "Nahum" / lookat => book { book = Nahum; continue; } - "Habakkuk" / lookat => book { book = Habakkuk; continue; } - "Zephaniah" / lookat => book { book = Zephaniah; continue; } - "Haggai" / lookat => book { book = Haggai; continue; } - "Zechariah" / lookat => book { book = Zechariah; continue; } - "Malachi" / lookat => book { book = Malachi; continue; } - "Matthew" / lookat => book { book = Matthew; continue; } - "Mark" / lookat => book { book = Mark; continue; } - "Luke" / lookat => book { book = Luke; continue; } - "John" / lookat => book { book = John; continue; } - "Acts" / lookat => book { book = Acts; continue; } - "Romans" / lookat => book { book = Romans; continue; } - first "Corinthians" / lookat - => book { book = ICorinthians; continue; } - second "Corinthians" / lookat - => book { book = IICorinthians; continue; } - "Galatians" / lookat => book { book = Galatians; continue; } - "Ephesians" / lookat => book { book = Ephesians; continue; } - "Philippians" / lookat => book { book = Philippians; continue; } - "Colossians" / lookat => book { book = Colossians; continue; } - first "Thessalonians" / lookat - => book { book = IThessalonians; continue; } - second "Thessalonians" / lookat - => book { book = IIThessalonians; continue; } - first "Timothy" / lookat => book { book = ITimothy; continue; } - second "Timothy" / lookat => book { book = IITimothy; continue; } - "Titus" / lookat => book { book = Titus; continue; } - "Philemon" / lookat => book { book = Philemon; continue; } - "Hebrews" / lookat => book { book = Hebrews; continue; } - "James" / lookat => book { book = James; continue; } - first "Peter" / lookat => book { book = IPeter; continue; } - second "Peter" / lookat => book { book = IIPeter; continue; } - first "John" / lookat => book { book = IJohn; continue; } - second "John" / lookat => book { book = IIJohn; continue; } - third "John" / lookat => book { book = IIIJohn; continue; } - "Jude" / lookat => book { book = Jude; continue; } - "Revelation" / lookat => book { book = Revelation; continue; } - - /* Extract further information. */ - * { why = "default unrecognized"; goto catch; } - /* 19:15a, just ignore the a. */ - ws+ @s0 natural @s1 ":" @t0 natural @t1 [ab]? { - if(chapter || verse || verse_end) - { why = "reference unrecognized"; goto catch; } - if(!pair_to_natural(s0, s1, &chapter) - || !pair_to_natural(t0, t1, &verse)) - { why = "reference numerical error"; goto catch; } - continue; - } - "-" @s0 natural @s1 [ab]? { /* Verse range. */ - if(!chapter || !verse || verse_end) - { why = "range unrecognized"; goto catch; } - if(!pair_to_natural(s0, s1, &verse_end)) - { why = "range numerical error"; goto catch; } - continue; - } - engage => skip { - if(!chapter || !verse) { why = "missing information"; goto catch; } - if(verse_end && verse_end <= verse) - { why = "interval error"; goto catch; } - union kjvcite cite - = { .book = book, .chapter = chapter, .verse = verse }; - if(!callback(kj, date, cite, verse_end)) - { why = "add to set"; goto catch; } - book = Revelation, chapter = 0, verse = 0, verse_end = 0; - continue; - } - */ } - assert(0); /* Never gets here. */ -catch: - if(!errno) errno = EILSEQ; - { - char datestr[12]; - date32_to_string(date, &datestr); - fprintf(stderr, "%s\n" - "%s line %zu: %s.\n", buffer, datestr, line, why); - } - return 0; -} - -static int add_to_set(struct kjv *const kj, const union line64 line, - union kjvcite cite, const uint32_t verse_end) { - char citestr[12], datestr[12]; - const struct source *src = source_lookup(&s, line); - assert(src); if(!src->name.a) { errno = EDOM; goto catch; } - kjvcite_to_string(cite, &citestr); - for( ; ; cite.verse++) { - if(!kjv_add(kj, cite)) return 0; - if(!verse_end || verse_end <= cite.verse) break; - } - date32_to_string(date, &datestr); - printf("%s\t%zu\t%zu\t%zu\t# ", - datestr, kj->words.verse, kj->words.set, kj->words.cumulative); - if(verse_end) { - printf("%s-%" PRIu32 "\n", citestr, verse_end); - } else { - printf("%s\n", citestr); - } - return 1; -} - -int main(void) { - int success = EXIT_SUCCESS; - const char *reason = "unknown"; - errno = 0; - struct journal j; - struct journal_iterator it; - struct kjv bible = kjv(); - if(kjv_is_empty(&bible)) { reason = "kjv failed to load"; goto catch; } - union date32 k; - const char *v; - j = journal(); - if(journal_is_empty(&j)) { reason = "journal failed to load"; goto catch; } - fprintf(stderr, "Journal: %s.\n", journal_to_string(&j)); - printf("set term postscript eps enhanced\n" - "set output \"kjv.eps\"\n" - "$Data < */ -#if defined GENERIC \ - || !defined BASE && !defined GENERIC && !defined PROTO /* */ +#endif /* private --> */ #if defined PROTO \ - || !defined BASE && !defined GENERIC && !defined PROTO /*