/** Run with a `kjv` sub-directory. Two functionaries: counts all words from verses dynamically and puts them into an unchanging `kjvcount_table` on initialisation. Then has a set to each of the verses, `kjvset`, which starts off empty. @license 2022 Neil Edelman, distributed under the terms of the [MIT License](https://opensource.org/licenses/MIT). Uses the KJV at [bible databases](https://github.com/scrollmapper/bible_databases/tree/master), "All included Bible translations are in the public domain." @std C11 */ #define BASE #include "../src/kjvcount.h" #include "../src/text.h" #include "../src/pair.h" #include #include #include #include #include /* opendir readdir closedir */ #include /* chdir (POSIX) (because I'm lazy) */ /* #include No; overkill. */ void kjvcite_to_string(const union kjvcite x, char (*const a)[12]) { sprintf(*a, "%.4s%" PRIu32 ":%" PRIu32, kjv_book_string[x.book < KJV_BOOK_SIZE ? x.book : KJV_BOOK_SIZE], x.chapter % 1000, x.verse % 1000); } /* Reversible hash map. */ /** on `x`. */ static uint32_t lowbias32(uint32_t x) { x ^= x >> 16; x *= 0x7feb352dU; x ^= x >> 15; x *= 0x846ca68bU; x ^= x >> 16; return x; } /* Inverts `x`. */ static uint32_t lowbias32_r(uint32_t x) { x ^= x >> 16; x *= 0x43021123U; x ^= x >> 15 ^ x >> 30; x *= 0x1d69e2a5U; x ^= x >> 16; return x; } /* Set of verses. */ static uint32_t kjvset_hash(const union kjvcite x) { return lowbias32(x.u32); } static union kjvcite kjvset_unhash(const uint32_t x) { union kjvcite k; k.u32 = lowbias32_r(x); return k; } static void kjvset_to_string(const union kjvcite x, char (*const a)[12]) { kjvcite_to_string(x, a); } #define TABLE_NAME kjvset #define TABLE_KEY union kjvcite #define TABLE_UINT uint32_t #define TABLE_UNHASH #define TABLE_TO_STRING #include "../src/table.h" /* Derived information on verse word count. */ static uint32_t count_hash(const union kjvcite x) { return kjvset_hash(x); } static union kjvcite count_unhash(const uint32_t x) { return kjvset_unhash(x); } static void count_to_string(const union kjvcite x, const unsigned count, char (*const a)[12]) { (void)count; kjvset_to_string(x, a); } #define TABLE_NAME count #define TABLE_KEY union kjvcite #define TABLE_UINT uint32_t #define TABLE_VALUE unsigned /* Count words. */ #define TABLE_UNHASH #define TABLE_DEFAULT 0 #define TABLE_TO_STRING #include "../src/table.h" /* Parse filename of books. */ /*!re2c /**/ re2c:yyfill:enable = 0; re2c:define:YYCTYPE = char; natural = [1-9][0-9]*; whitespace = [ \t\v\f]; word = [^ \t\v\f\n\x00]+; */ /** `fn` contains "[*].txt", sticks that in `book_no`, otherwise returns false. */ static int looks_like_book_fn(const char *fn, unsigned *const book_no) { const char *YYCURSOR = fn, *YYMARKER, *yyt1, *yyt2, *s0, *s1; assert(fn && book_no); /*!re2c /**/ * { return 0; } @s0 natural @s1 [^.\x00]* ".txt" "\x00" { return pair_to_natural(s0, s1, book_no); } */ } /* This is the contents of the . */ struct lex { size_t line; const char *cursor; int error; uint32_t chapter, verse, words; }; static struct lex lex(const char *cursor) { struct lex lex; assert(cursor); lex.line = 1; lex.cursor = cursor; lex.error = 0; lex.chapter = lex.verse = lex.words = 0; return lex; } /*!conditions:re2c*/ static int lex_next_verse(struct lex *const lex) { const char *YYMARKER, *yyt1 = 0, *yyt2 = 0, *s0, *s1, *t0, *t1; enum YYCONDTYPE condition = yycline; /*!re2c /**/ re2c:define:YYCURSOR = lex->cursor; re2c:define:YYGETCONDITION = "condition"; re2c:define:YYSETCONDITION = "condition = @@;"; re2c:define:YYGETCONDITION:naked = 1; re2c:define:YYSETCONDITION:naked = 1; */ assert(lex && lex->cursor); lex->error = 0; scan: /*!re2c /**/ <*> * { return errno = EILSEQ, lex->error = 1, 0; } [^[\]\n\x00]* "\n" { lex->line++; goto scan; } "\x00" { return 0; } "[" @s0 natural @s1 ":" @t0 natural @t1 "]" => verse { if(!pair_to_natural(s0, s1, &lex->chapter) || !pair_to_natural(t0, t1, &lex->verse)) return errno = EILSEQ, lex->error = 1, 0; lex->words = 0; /*printf("%u:%u", lex->chapter, lex->verse);*/ goto scan; } whitespace+ { goto scan; } @s0 word @s1 { lex->words++; goto scan; } "\n" { /*printf(" -> %u\n", lex->words);*/ lex->line++; return 1; } */ } #define PROTO #include "../src/kjvcount.h" /* Just the kjv and prototypes. */ /** Frees `kjv`. */ void kjvcount_(struct kjvcount *const count) { if(!count) return; count_table_(&count->verses); count->words.total = count->words.cumulative = count->words.set = 0; } /** Loads 66 files from the "kjv/" directory. Prints out something if it doesn't work, but does not call `perror` or reset `errno`. Use to tell. */ struct kjvcount kjvcount(void) { const char *const dir_kjv = "kjv"; struct text backing = text(); struct kjvcount count = {0}; DIR *dir = 0; struct dirent *de = 0; struct { size_t offset; int is; } build[KJV_BOOK_SIZE] = { 0 }; enum kjv_book b = 0; int is_in_kjv = 0; /* For all files in directory KJV with <#>*.txt, read into backing. */ if(chdir(dir_kjv) == -1 || (is_in_kjv = 1, !(dir = opendir(".")))) goto catch; while((de = readdir(dir))) { unsigned ordinal; char *unstable_backing; if(!looks_like_book_fn(de->d_name, &ordinal)) continue; /*fprintf(stderr, "<%s> ordinal: %u\n", de->d_name, ordinal);*/ if(ordinal < 1 || ordinal > KJV_BOOK_SIZE) { errno = ERANGE; goto catch; } /* Not in range. */ if(build[b = ordinal - 1].is) /* Convert to zero-based. */ { errno = EDOM; goto catch; } /* Is duplicate. */ if(!(unstable_backing = text_append_file(&backing, de->d_name))) goto catch; build[b].is = 1; build[b].offset = (size_t)(unstable_backing - backing.a.data); } if(closedir(dir) == -1) { dir = 0; goto catch; } dir = 0; /* Now backing is stable; count all the words for each verse. */ for(b = 0; b < KJV_BOOK_SIZE; b++) { struct lex x; if(!build[b].is) { fprintf(stderr, "Missing book [%u]%s.\n", b + 1, kjv_book_string[b]); errno = EDOM; goto catch; } x = lex(backing.a.data + build[b].offset); while(lex_next_verse(&x)) { const union kjvcite cite = { .book = b, .chapter = x.chapter, .verse = x.verse }; unsigned *words; switch(count_table_assign(&count.verses, cite, &words)) { case TABLE_PRESENT: fprintf(stderr, "[%u]%s %u:%u duplicated.\n", b + 1, kjv_book_string[b], x.chapter, x.verse); errno = EDOM; case TABLE_ERROR: goto catch; case TABLE_ABSENT: break; } *words = x.words, count.words.total += x.words; } if(x.error) { fprintf(stderr, "[%u]%s on line %zu\n", b + 1, kjv_book_string[b], x.line); goto catch; } } goto finally; catch: if(de) fprintf(stderr, "While reading %s/%s.\n", dir_kjv, de->d_name); else fprintf(stderr, "In directory %s/.\n", dir_kjv); recatch: kjvcount_(&count); finally: if(dir) { if(closedir(dir)) { dir = 0; goto recatch; } dir = 0; } if(is_in_kjv && (is_in_kjv = 0, chdir("..") == -1)) goto recatch; text_(&backing); return count; } /** Has loaded properly? Otherwise, probably `errno` is set. */ int kjvcount_is_empty(const struct kjvcount *const kjv) { return !kjv || !kjv->verses.buckets; } const char *kjvcount_to_string(const struct kjvcount *const count) { return count ? count_table_to_string(&count->verses) : ""; } struct kjvset_table kjv_set(void) { return kjvset_table(); } void kjv_set_(struct kjvset_table *const set) { kjvset_table_(set); } const char *kjv_set_to_string(const struct kjvset_table *const set) { return set ? kjvset_table_to_string(set) : 0; } /** Adds `cite` to `kjv` if not present. Only used in test. @return Is the kjv still valid. */ int kjv_set_add(struct kjvset_table *const set, struct kjvcount *const count, const union kjvcite cite) { size_t no_verse; if(!set || !count) return 0; no_verse = count_table_get(&count->verses, cite); count->words.cumulative += no_verse; switch(kjvset_table_try(set, cite)) { case TABLE_ERROR: return 0; case TABLE_ABSENT: count->words.set += no_verse; /* Sic. */ case TABLE_PRESENT: break; } return 1; }