232 lines
7.9 KiB
C
232 lines
7.9 KiB
C
/** Run with a `kjv` sub-directory. Two functionaries: counts all words from
|
|
verses <https://github.com/scrollmapper/bible_databases/master/txt/KJV/>
|
|
dynamically and puts them into an unchanging `kjvcount_table` on initialisation.
|
|
Then has a set to each of the verses, `kjvset`, which starts off empty.
|
|
@license 2022 Neil Edelman, distributed under the terms of the
|
|
[MIT License](https://opensource.org/licenses/MIT). Uses the KJV at
|
|
[bible databases](https://github.com/scrollmapper/bible_databases/tree/master),
|
|
"All included Bible translations are in the public domain."
|
|
@std C11 */
|
|
|
|
#include "../src/kjv.h"
|
|
#include "../src/pair.h"
|
|
#include <inttypes.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <assert.h>
|
|
#include <dirent.h> /* opendir readdir closedir */
|
|
#include <unistd.h> /* chdir (POSIX) (because I'm lazy) */
|
|
|
|
|
|
void kjvcite_to_string(const union kjvcite x, char (*const a)[12])
|
|
{ sprintf(*a, "%.4s%" PRIu32 ":%" PRIu32,
|
|
kjv_book_string[x.book < KJV_BOOK_SIZE ? x.book : KJV_BOOK_SIZE],
|
|
x.chapter % 1000, x.verse % 1000); }
|
|
|
|
/* Reversible hash map. */
|
|
/** <https://nullprogram.com/blog/2018/07/31/>
|
|
<https://github.com/skeeto/hash-prospector> on `x`. */
|
|
static uint32_t lowbias32(uint32_t x) {
|
|
x ^= x >> 16;
|
|
x *= 0x7feb352dU;
|
|
x ^= x >> 15;
|
|
x *= 0x846ca68bU;
|
|
x ^= x >> 16;
|
|
return x;
|
|
}
|
|
/* Inverts `x`. */
|
|
static uint32_t lowbias32_r(uint32_t x) {
|
|
x ^= x >> 16;
|
|
x *= 0x43021123U;
|
|
x ^= x >> 15 ^ x >> 30;
|
|
x *= 0x1d69e2a5U;
|
|
x ^= x >> 16;
|
|
return x;
|
|
}
|
|
|
|
/* Set of verses. */
|
|
static uint32_t kjvset_hash(const union kjvcite x) { return lowbias32(x.u32); }
|
|
static union kjvcite kjvset_unhash(const uint32_t x)
|
|
{ union kjvcite k; k.u32 = lowbias32_r(x); return k; }
|
|
static void kjvset_to_string(const union kjvcite x, char (*const a)[12])
|
|
{ kjvcite_to_string(x, a); }
|
|
#define TABLE_NAME kjvset
|
|
#define TABLE_KEY union kjvcite
|
|
#define TABLE_UINT uint32_t
|
|
#define TABLE_UNHASH
|
|
#define TABLE_TO_STRING
|
|
#define TABLE_BODY
|
|
#include "../src/table.h"
|
|
|
|
/* Derived information on verse word count. */
|
|
static uint32_t kjvcount_hash(const union kjvcite x) { return kjvset_hash(x); }
|
|
static union kjvcite kjvcount_unhash(const uint32_t x)
|
|
{ return kjvset_unhash(x); }
|
|
static void kjvcount_to_string(const union kjvcite x, const unsigned count,
|
|
char (*const a)[12]) { (void)count; kjvcite_to_string(x, a); }
|
|
#define TABLE_NAME kjvcount
|
|
#define TABLE_KEY union kjvcite
|
|
#define TABLE_UINT uint32_t
|
|
#define TABLE_VALUE unsigned /* Count words. */
|
|
#define TABLE_UNHASH
|
|
#define TABLE_DEFAULT 0
|
|
#define TABLE_TO_STRING
|
|
#define TABLE_BODY
|
|
#include "../src/table.h"
|
|
|
|
|
|
/* Parse filename of books. */
|
|
/*!re2c /**/
|
|
re2c:yyfill:enable = 0;
|
|
re2c:define:YYCTYPE = char;
|
|
natural = [1-9][0-9]*;
|
|
whitespace = [ \t\v\f];
|
|
word = [^ \t\v\f\n\x00]+;
|
|
*/
|
|
/** `fn` contains "<number>[*].txt", sticks that in `book_no`, otherwise
|
|
returns false. */
|
|
static int looks_like_book_fn(const char *fn, unsigned *const book_no) {
|
|
const char *YYCURSOR = fn, *YYMARKER, *yyt1, *yyt2, *s0, *s1;
|
|
assert(fn && book_no);
|
|
/*!re2c /**/
|
|
*
|
|
{ return 0; }
|
|
@s0 natural @s1 [^.\x00]* ".txt" "\x00"
|
|
{ return pair_to_natural(s0, s1, book_no); }
|
|
*/
|
|
}
|
|
|
|
|
|
/* This is the contents of the <fn:looks_like_book_fn>. */
|
|
struct lex {
|
|
size_t line;
|
|
const char *cursor;
|
|
int error;
|
|
uint32_t chapter, verse, words;
|
|
};
|
|
static struct lex lex(const char *cursor) {
|
|
struct lex lex;
|
|
assert(cursor);
|
|
lex.line = 1;
|
|
lex.cursor = cursor;
|
|
lex.error = 0;
|
|
lex.chapter = lex.verse = lex.words = 0;
|
|
return lex;
|
|
}
|
|
/*!conditions:re2c*/
|
|
static int lex_next_verse(struct lex *const lex) {
|
|
const char *YYMARKER, *yyt1 = 0, *yyt2 = 0, *s0, *s1, *t0, *t1;
|
|
enum YYCONDTYPE condition = yycline;
|
|
/*!re2c /**/
|
|
re2c:define:YYCURSOR = lex->cursor;
|
|
re2c:define:YYGETCONDITION = "condition";
|
|
re2c:define:YYSETCONDITION = "condition = @@;";
|
|
re2c:define:YYGETCONDITION:naked = 1;
|
|
re2c:define:YYSETCONDITION:naked = 1; */
|
|
assert(lex && lex->cursor);
|
|
lex->error = 0;
|
|
scan:
|
|
/*!re2c /**/
|
|
<*> * { return errno = EILSEQ, lex->error = 1, 0; }
|
|
<line> [^[\]\n\x00]* "\n" { lex->line++; goto scan; }
|
|
<line> "\x00" { return 0; }
|
|
<line> "[" @s0 natural @s1 ":" @t0 natural @t1 "]" => verse {
|
|
if(!pair_to_natural(s0, s1, &lex->chapter)
|
|
|| !pair_to_natural(t0, t1, &lex->verse))
|
|
return errno = EILSEQ, lex->error = 1, 0;
|
|
lex->words = 0;
|
|
/*printf("%u:%u", lex->chapter, lex->verse);*/
|
|
goto scan;
|
|
}
|
|
<verse> whitespace+ { goto scan; }
|
|
<verse> @s0 word @s1 { lex->words++; goto scan; }
|
|
<verse> "\n" { /*printf(" -> %u\n", lex->words);*/ lex->line++; return 1; }
|
|
*/
|
|
}
|
|
|
|
/* KJV count -- loaded up on initialization and is static for it's lifetime.
|
|
Given the verse citation, how many words does it have? */
|
|
/** Frees `count`. */
|
|
void kjv_count_(struct kjvcount_table *const count) { kjvcount_table_(count); }
|
|
/** Loads 66 files from the "kjv/" directory and counts all the words, which
|
|
are stored in `total`. `total` is zero if an error occurred, in which case the
|
|
details are sent to `stderr` and `errno` is set. @return On success, a
|
|
`kjvcount_table` that maps citations to word count. */
|
|
struct kjvcount_table kjv_count(size_t *const total) {
|
|
const char *const dir_kjv = "kjv";
|
|
struct char_array backing = text();
|
|
struct kjvcount_table count = kjvcount_table();
|
|
DIR *dir = 0;
|
|
struct dirent *de = 0;
|
|
struct { size_t offset; int is; } build[KJV_BOOK_SIZE] = { 0 };
|
|
enum kjv_book b = 0;
|
|
int is_in_kjv = 0;
|
|
|
|
assert(total);
|
|
*total = 0;
|
|
/* For all files in directory KJV with <#>*.txt, read into backing. */
|
|
if(chdir(dir_kjv) == -1 || (is_in_kjv = 1, !(dir = opendir("."))))
|
|
goto catch;
|
|
while((de = readdir(dir))) {
|
|
unsigned ordinal;
|
|
char *unstable_backing;
|
|
if(!looks_like_book_fn(de->d_name, &ordinal)) continue;
|
|
/*fprintf(stderr, "<%s> ordinal: %u\n", de->d_name, ordinal);*/
|
|
if(ordinal < 1 || ordinal > KJV_BOOK_SIZE)
|
|
{ errno = ERANGE; goto catch; } /* Not in range. */
|
|
if(build[b = ordinal - 1].is) /* Convert to zero-based. */
|
|
{ errno = EDOM; goto catch; } /* Is duplicate. */
|
|
if(!(unstable_backing = text_append_file(&backing, de->d_name)))
|
|
goto catch;
|
|
build[b].is = 1;
|
|
build[b].offset = (size_t)(unstable_backing - backing.data);
|
|
}
|
|
if(closedir(dir) == -1) { dir = 0; goto catch; } dir = 0;
|
|
|
|
/* Now backing is stable; count all the words for each verse. */
|
|
for(b = 0; b < KJV_BOOK_SIZE; b++) {
|
|
struct lex x;
|
|
if(!build[b].is) { fprintf(stderr, "Missing book [%u]%s.\n",
|
|
b + 1, kjv_book_string[b]); errno = EDOM; goto catch; }
|
|
x = lex(backing.data + build[b].offset);
|
|
while(lex_next_verse(&x)) {
|
|
const union kjvcite cite
|
|
= { .book = b, .chapter = x.chapter, .verse = x.verse };
|
|
unsigned *words;
|
|
switch(kjvcount_table_assign(&count, cite, &words)) {
|
|
case TABLE_PRESENT: fprintf(stderr, "[%u]%s %u:%u duplicated.\n",
|
|
b + 1, kjv_book_string[b], x.chapter, x.verse); errno = EDOM;
|
|
case TABLE_ERROR: goto catch;
|
|
case TABLE_ABSENT: break;
|
|
}
|
|
*words = x.words, *total += x.words;
|
|
}
|
|
if(x.error) { fprintf(stderr, "[%u]%s on line %zu\n",
|
|
b + 1, kjv_book_string[b], x.line); goto catch; }
|
|
}
|
|
goto finally;
|
|
catch:
|
|
*total = 0;
|
|
if(de) fprintf(stderr, "While reading %s/%s.\n", dir_kjv, de->d_name);
|
|
else fprintf(stderr, "In directory %s/.\n", dir_kjv);
|
|
recatch:
|
|
kjv_count_(&count);
|
|
finally:
|
|
if(dir) { if(closedir(dir)) { dir = 0; goto recatch; } dir = 0; }
|
|
if(is_in_kjv && (is_in_kjv = 0, chdir("..") == -1)) goto recatch;
|
|
text_(&backing);
|
|
return count;
|
|
}
|
|
size_t kjv_count_get(struct kjvcount_table *const count,
|
|
const union kjvcite cite) { return kjvcount_table_get(count, cite); }
|
|
const char *kjv_count_to_string(const struct kjvcount_table *const count)
|
|
{ return kjvcount_table_to_string(count); }
|
|
|
|
/* KJV set -- keeps track of membership by verse. */
|
|
struct kjvset_table kjv_set(void) { return kjvset_table(); }
|
|
void kjv_set_(struct kjvset_table *const set) { kjvset_table_(set); }
|
|
enum table_result kjv_set_add(struct kjvset_table *const set,
|
|
const union kjvcite cite) { return kjvset_table_try(set, cite); }
|
|
const char *kjv_set_to_string(const struct kjvset_table *const set)
|
|
{ return set ? kjvset_table_to_string(set) : 0; }
|