/** @license 2022 Neil Edelman, distributed under the terms of the [MIT License](https://opensource.org/licenses/MIT). Is intended to use . @std C13 */ #include #include #include #include #include /* opendir readdir closedir */ #include /* chdir (POSIX) (because I'm lazy) */ /* Dynamic contiguous string that is used to load files. */ #define ARRAY_NAME char #define ARRAY_TYPE char #include "../src/array.h" /** Append a text file, `fn`, to `c`, and add a '\0'. @return Success. A partial read is failure. @throws[fopen, fread, malloc] @throws[EISEQ] The text file has embedded nulls. @throws[ERANGE] If the standard library does not follow POSIX. */ static int append_file(struct char_array *c, const char *const fn) { FILE *fp = 0; const size_t granularity = 1024; size_t nread; char *cursor; int success = 0; assert(c && fn); if(!(fp = fopen(fn, "r"))) goto catch; /* Read entire file in chunks. */ do if(!(cursor = char_array_buffer(c, granularity)) || (nread = fread(cursor, 1, granularity, fp), ferror(fp)) || !char_array_append(c, nread)) goto catch; while(nread == granularity); /* File to `C` string. */ if(!(cursor = char_array_new(c))) goto catch; *cursor = '\0'; /* Binary files with embedded '\0' are not allowed. */ if(strchr(c->data, '\0') != cursor) { errno = EILSEQ; goto catch; } { success = 1; goto finally; } catch: if(!errno) errno = EILSEQ; /* Will never be true on POSIX. */ finally: if(fp) fclose(fp); return success; } /** Helper to parse unsigned; [`s`,`e`) => `n`. */ static int parse_natural(const char *s, const char *const e, unsigned *const n) { unsigned accum = 0; while(s < e) { unsigned next = accum * 10 + (unsigned)(*s - '0'); if(accum >= next) return errno = ERANGE, 0; accum = next; s++; } *n = accum; return 1; } /* Enumerate books. */ #define BOOKS \ X(Genesis),\ X(Exodus),\ X(Leviticus),\ X(Numbers),\ X(Deuteronomy),\ X(Joshua),\ X(Judges),\ X(Ruth),\ X(ISamuel),\ X(IISamuel),\ X(IKings),\ X(IIKings),\ X(IChronicles),\ X(IIChronicles),\ X(Ezra),\ X(Nehemiah),\ X(Esther),\ X(Job),\ X(Psalms),\ X(Proverbs),\ X(Ecclesiastes),\ X(Song_of_Solomon),\ X(Isaiah),\ X(Jeremiah),\ X(Lamentations),\ X(Ezekiel),\ X(Daniel),\ X(Hosea),\ X(Joel),\ X(Amos),\ X(Obadiah),\ X(Jonah),\ X(Micah),\ X(Nahum),\ X(Habakkuk),\ X(Zephaniah),\ X(Haggai),\ X(Zechariah),\ X(Malachi),\ \ X(Matthew),\ X(Mark),\ X(Luke),\ X(John),\ X(Acts),\ X(Romans),\ X(ICorinthians),\ X(IICorinthians),\ X(Galatians),\ X(Ephesians),\ X(Philippians),\ X(Colossians),\ X(IThessalonians),\ X(IIThessalonians),\ X(ITimothy),\ X(IITimothy),\ X(Titus),\ X(Philemon),\ X(Hebrews),\ X(James),\ X(IPeter),\ X(IIPeter),\ X(IJohn),\ X(IIJohn),\ X(IIIJohn),\ X(Jude),\ X(Revelation),\ X(KJV_BOOK_SIZE) #define X(book) book enum kjv_book { BOOKS }; #undef X #define X(book) #book static const char *kjv_book_string[] = { BOOKS }; #undef X #undef BOOKS /* Parse filename of books. This works with */ /*!re2c /**/ re2c:yyfill:enable = 0; re2c:define:YYCTYPE = char; natural = [1-9][0-9]*; whitespace = [ \t\v\f]; word = [^ \t\v\f\n\x00]+; */ /** `fn` contains "[*].txt", sticks that in `book_no`, otherwise returns false. */ static int kjv_filename(const char *fn, unsigned *const book_no) { const char *YYCURSOR = fn, *YYMARKER, *yyt1, *yyt2, *s0, *s1; assert(fn && book_no); /*!re2c /**/ * { return 0; } @s0 natural @s1 [^.\x00]* ".txt" "\x00" { return parse_natural(s0, s1, book_no); } */ } /* Parse book contents. */ struct lex { size_t line; const char *cursor; int error; unsigned chapter, verse, words; }; static struct lex lex(const char *cursor) { struct lex lex; assert(cursor); lex.line = 1; lex.cursor = cursor; lex.error = 0; lex.chapter = lex.verse = lex.words = 0; return lex; } /*!conditions:re2c*/ static int lex_next_verse(struct lex *const lex) { const char *YYMARKER, *yyt1 = 0, *yyt2 = 0, *s0, *s1, *t0, *t1; enum YYCONDTYPE condition = yycline; /*!re2c /**/ re2c:define:YYCURSOR = lex->cursor; re2c:define:YYGETCONDITION = "condition"; re2c:define:YYSETCONDITION = "condition = @@;"; re2c:define:YYGETCONDITION:naked = 1; re2c:define:YYSETCONDITION:naked = 1; */ assert(lex && lex->cursor); lex->error = 0; scan: /*!re2c /**/ <*> * { return errno = EILSEQ, lex->error = 1, 0; } [^[\]\n\x00]* "\n" { lex->line++; goto scan; } "\x00" { return 0; } "[" @s0 natural @s1 ":" @t0 natural @t1 "]" => verse { if(!parse_natural(s0, s1, &lex->chapter) || !parse_natural(t0, t1, &lex->verse)) return errno = EILSEQ, lex->error = 1, 0; lex->words = 0; /*printf("%u:%u", lex->chapter, lex->verse);*/ goto scan; } whitespace+ { goto scan; } @s0 word @s1 { lex->words++; goto scan; } "\n" { /*printf(" -> %u\n", lex->words);*/ lex->line++; return 1; } */ } /* Reversible hash map to store data on bible. */ #include /** on `x`. */ static uint32_t lowbias32(uint32_t x) { x ^= x >> 16; x *= 0x7feb352dU; x ^= x >> 15; x *= 0x846ca68bU; x ^= x >> 16; return x; } /* Inverts `x`. */ static uint32_t lowbias32_r(uint32_t x) { x ^= x >> 16; x *= 0x43021123U; x ^= x >> 15 ^ x >> 30; x *= 0x1d69e2a5U; x ^= x >> 16; return x; } union kjvcite { uint32_t u32; struct { unsigned verse : 8, chapter : 8, book : 7; }; }; static uint32_t kjv_hash(const union kjvcite x) { return lowbias32(x.u32); } static union kjvcite kjv_unhash(const uint32_t x) { union kjvcite k; k.u32 = lowbias32_r(x); return k; } static void kjv_to_string(const union kjvcite x, char (*const a)[12]) { sprintf(*a, "%.4s%u:%u", kjv_book_string[x.book], (x.chapter + 1) % 1000, (x.verse + 1) % 1000); } static uint32_t words_hash(const union kjvcite x) { return kjv_hash(x); } static union kjvcite words_unhash(const uint32_t x) { return kjv_unhash(x); } static void words_to_string(const union kjvcite x, char (*const a)[12]) { kjv_to_string(x, a); } #define TABLE_NAME words #define TABLE_KEY union kjvcite #define TABLE_UINT uint32_t #define TABLE_VALUE unsigned #define TABLE_DEFAULT 0 #define TABLE_INVERSE #define TABLE_TO_STRING #include "../src/table.h" static uint32_t kjvset_hash(const union kjvcite x) { return kjv_hash(x); } static union kjvcite kjvset_unhash(const uint32_t x) { return kjv_unhash(x); } static void kjvset_to_string(const union kjvcite x, char (*const a)[12]) { kjv_to_string(x, a); } #define TABLE_NAME kjvset #define TABLE_KEY union kjvcite #define TABLE_UINT uint32_t #define TABLE_INVERSE #define TABLE_TO_STRING #include "../src/table.h" int main(void) { const char *const dir_name = "KJV"; struct char_array kjv[KJV_BOOK_SIZE] = { 0 }; struct words_table words = { 0 }; int success = EXIT_SUCCESS; DIR *dir = 0; struct dirent *de = 0; unsigned i; size_t cum_words = 0; errno = 0; /* Read in the kjv from all files. This is overkill, we don't need to keep all the data, just count. Maybe we'll do something else later? */ if(chdir(dir_name) == -1 || !(dir = opendir("."))) goto catch; while((de = readdir(dir))) { /* For all files in directory. */ unsigned ordinal; enum kjv_book b; if(!kjv_filename(de->d_name, &ordinal)) /* Extract ordinal. */ { /*fprintf(stderr, "Ignored <%s>.\n", de->d_name);*/ continue; } /*fprintf(stderr, "<%s> ordinal: %u\n", de->d_name, ordinal);*/ if(ordinal < 1 || ordinal > KJV_BOOK_SIZE) { errno = ERANGE; goto catch; } /* Not in range. */ if(kjv[b = ordinal - 1].data) /* Convert to zero-based. */ { errno = EDOM; goto catch; } /* Duplicate. */ if(!append_file(kjv + b, de->d_name)) goto catch; } closedir(dir), de = 0, dir = 0; /* Parse number of words. */ for(i = 0; i < KJV_BOOK_SIZE; i++) { struct lex x = lex(kjv[i].data); if(!x.cursor) { fprintf(stderr, "Missing book [%u]%s.\n", i + 1, kjv_book_string[i]); errno = EDOM; goto catch; } printf("[%u]%s: cumulative %zu.\n", i + 1, kjv_book_string[i], cum_words); while(lex_next_verse(&x)) { const union kjvcite c = { .verse = x.verse, .chapter = x.chapter, .book = i }; unsigned *w; switch(words_table_assign(&words, c, &w)) { case TABLE_PRESENT: fprintf(stderr, "[%u]%s %u:%u duplicated.\n", i + 1, kjv_book_string[i], x.chapter, x.verse); errno = EDOM; case TABLE_ERROR: goto catch; case TABLE_ABSENT: *w = x.words; break; } cum_words += x.words; } if(x.error) { fprintf(stderr, "[%u]%s on line %zu\n", i + 1, kjv_book_string[i], x.line); goto catch; } } printf("words: %s\n", words_table_to_string(&words)); printf("kjv: %zu words\n", cum_words); { union kjvcite c; struct words_table_iterator it = words_table_begin(&words); unsigned *w; while(words_table_next(&it, &c, &w)) printf("%s %u:%u -> %u\n", kjv_book_string[c.book], c.chapter, c.verse, *w); c.u32 = 0; /* Unnecessary? */ c.book = Genesis, c.chapter = 1, c.verse = 1; printf("1:1:1 -> %u\n", words_table_get(&words, c)); } goto finally; catch: success = EXIT_FAILURE; perror(de ? de->d_name : dir_name); if(dir && closedir(dir)) perror(dir_name); finally: for(i = 0; i < KJV_BOOK_SIZE; i++) char_array_(kjv + i); return success; }