interpret/src/kjv.re.c

250 lines
7.7 KiB
C
Raw Normal View History

2022-12-27 02:31:08 -05:00
/** Run with a `KJV` sub-directory,
<https://github.com/scrollmapper/bible_databases/master/txt/KJV/>, outputs a
`gperf` file that has all the the words of all the verses in `kjv.h` format.
@license 2022 Neil Edelman, distributed under the terms of the
[MIT License](https://opensource.org/licenses/MIT). Uses the KJV at
[bible databases](https://github.com/scrollmapper/bible_databases/tree/master),
"All included Bible translations are in the public domain."
@std C11 */
#include "../src/text.h"
2022-12-27 23:45:57 -05:00
#define OMIT_VERSES
#define OMIT_PROTO
2022-12-27 15:28:04 -05:00
#include "../src/kjv.h" /* Just the base data. */
2022-12-27 23:45:57 -05:00
#include <inttypes.h>
2022-12-13 03:31:56 -05:00
#include <stdio.h>
2022-12-27 15:28:04 -05:00
#include <stdlib.h>
#include <errno.h>
2022-12-27 15:28:04 -05:00
#include <assert.h>
2022-12-13 03:31:56 -05:00
#include <dirent.h> /* opendir readdir closedir */
#include <unistd.h> /* chdir (POSIX) (because I'm lazy) */
2022-12-27 15:28:04 -05:00
/* #include <cmph.h> No; overkill. */
/* Reversible hash map. */
/** <https://nullprogram.com/blog/2018/07/31/>
<https://github.com/skeeto/hash-prospector> on `x`. */
static uint32_t lowbias32(uint32_t x) {
x ^= x >> 16;
x *= 0x7feb352dU;
x ^= x >> 15;
x *= 0x846ca68bU;
x ^= x >> 16;
return x;
}
/* Inverts `x`. */
static uint32_t lowbias32_r(uint32_t x) {
x ^= x >> 16;
x *= 0x43021123U;
x ^= x >> 15 ^ x >> 30;
x *= 0x1d69e2a5U;
x ^= x >> 16;
return x;
}
2022-12-27 16:01:51 -05:00
static uint32_t kjvset_hash(const union kjvcite x) { return lowbias32(x.u32); }
static union kjvcite kjvset_unhash(const uint32_t x)
2022-12-27 15:28:04 -05:00
{ union kjvcite k; k.u32 = lowbias32_r(x); return k; }
2022-12-27 16:01:51 -05:00
static void kjvset_to_string(const union kjvcite x, char (*const a)[12])
2022-12-27 23:45:57 -05:00
{ sprintf(*a, "%.4s%" PRIu32 ":%" PRIu32, kjv_book_string[x.book],
2022-12-27 16:48:45 -05:00
x.chapter % 1000, x.verse % 1000); }
2022-12-27 16:01:51 -05:00
#define TABLE_NAME kjvset
2022-12-27 15:28:04 -05:00
#define TABLE_KEY union kjvcite
#define TABLE_UINT uint32_t
#define TABLE_INVERSE
#define TABLE_TO_STRING
#include "../src/table.h"
/* Derived information on verse word count. */
2022-12-27 16:01:51 -05:00
static uint32_t verse_hash(const union kjvcite x) { return kjvset_hash(x); }
static union kjvcite verse_unhash(const uint32_t x) { return kjvset_unhash(x); }
2022-12-27 15:28:04 -05:00
static void verse_to_string(const union kjvcite x, const unsigned count,
2022-12-27 16:01:51 -05:00
char (*const a)[12]) { (void)count; kjvset_to_string(x, a); }
2022-12-27 15:28:04 -05:00
#define TABLE_NAME verse
#define TABLE_KEY union kjvcite
#define TABLE_UINT uint32_t
#define TABLE_VALUE unsigned /* Count words. */
#define TABLE_INVERSE
#define TABLE_DEFAULT 0
#define TABLE_TO_STRING
#include "../src/table.h"
2022-12-13 03:31:56 -05:00
/** Helper to parse unsigned; [`s`,`e`) => `n`. */
static int parse_natural(const char *s, const char *const e, unsigned *const n) {
2022-12-12 03:45:41 -05:00
unsigned accum = 0;
while(s < e) {
2022-12-12 03:45:41 -05:00
unsigned next = accum * 10 + (unsigned)(*s - '0');
if(accum >= next) return errno = ERANGE, 0;
accum = next;
s++;
}
2022-12-12 03:45:41 -05:00
*n = accum;
return 1;
}
2022-12-13 03:31:56 -05:00
2022-12-27 02:31:08 -05:00
/* Parse filename of books. */
/*!re2c /**/
re2c:yyfill:enable = 0;
re2c:define:YYCTYPE = char;
natural = [1-9][0-9]*;
2022-12-13 00:25:28 -05:00
whitespace = [ \t\v\f];
word = [^ \t\v\f\n\x00]+;
*/
2022-12-13 00:25:28 -05:00
/** `fn` contains "<number>[*].txt", sticks that in `book_no`, otherwise
returns false. */
2022-12-27 02:31:08 -05:00
static int looks_like_book_fn(const char *fn, unsigned *const book_no) {
const char *YYCURSOR = fn, *YYMARKER, *yyt1, *yyt2, *s0, *s1;
assert(fn && book_no);
/*!re2c /**/
*
{ return 0; }
2022-12-13 00:25:28 -05:00
@s0 natural @s1 [^.\x00]* ".txt" "\x00"
{ return parse_natural(s0, s1, book_no); }
*/
}
2022-12-13 00:25:28 -05:00
2022-12-27 02:31:08 -05:00
/* This is the contents of the <fn:looks_like_book_fn>. */
2022-12-13 03:31:56 -05:00
struct lex {
size_t line;
const char *cursor;
int error;
unsigned chapter, verse, words;
};
static struct lex lex(const char *cursor) {
struct lex lex;
assert(cursor);
lex.line = 1;
lex.cursor = cursor;
lex.error = 0;
lex.chapter = lex.verse = lex.words = 0;
return lex;
}
2022-12-13 00:25:28 -05:00
/*!conditions:re2c*/
2022-12-13 03:31:56 -05:00
static int lex_next_verse(struct lex *const lex) {
const char *YYMARKER, *yyt1 = 0, *yyt2 = 0, *s0, *s1, *t0, *t1;
enum YYCONDTYPE condition = yycline;
2022-12-13 00:25:28 -05:00
/*!re2c /**/
2022-12-13 03:31:56 -05:00
re2c:define:YYCURSOR = lex->cursor;
re2c:define:YYGETCONDITION = "condition";
re2c:define:YYSETCONDITION = "condition = @@;";
re2c:define:YYGETCONDITION:naked = 1;
re2c:define:YYSETCONDITION:naked = 1; */
assert(lex && lex->cursor);
lex->error = 0;
2022-12-13 00:25:28 -05:00
scan:
/*!re2c /**/
<*> * { return errno = EILSEQ, lex->error = 1, 0; }
<line> [^[\]\n\x00]* "\n" { lex->line++; goto scan; }
<line> "\x00" { return 0; }
2022-12-13 03:31:56 -05:00
<line> "[" @s0 natural @s1 ":" @t0 natural @t1 "]" => verse {
if(!parse_natural(s0, s1, &lex->chapter)
|| !parse_natural(t0, t1, &lex->verse))
return errno = EILSEQ, lex->error = 1, 0;
lex->words = 0;
/*printf("%u:%u", lex->chapter, lex->verse);*/
2022-12-13 03:31:56 -05:00
goto scan;
2022-12-13 00:25:28 -05:00
}
2022-12-13 03:31:56 -05:00
<verse> whitespace+ { goto scan; }
<verse> @s0 word @s1 { lex->words++; goto scan; }
<verse> "\n" { /*printf(" -> %u\n", lex->words);*/ lex->line++; return 1; }
2022-12-13 00:25:28 -05:00
*/
}
2022-12-27 23:45:57 -05:00
#define OMIT_BASE
#define OMIT_VERSES
2022-12-27 15:28:04 -05:00
#include "../src/kjv.h" /* Just the kjv and prototypes. */
2022-12-14 00:05:29 -05:00
2022-12-27 16:48:45 -05:00
/** Frees `kjv`. */
2022-12-27 15:28:04 -05:00
void kjv_(struct kjv *const kjv) {
if(!kjv) return;
2022-12-27 16:01:51 -05:00
kjvset_table_(&kjv->set);
2022-12-27 15:28:04 -05:00
verse_table_(&kjv->verses);
}
2022-12-27 02:31:08 -05:00
2022-12-27 16:55:58 -05:00
/** Loads 66 files from the "kjv/" directory. Prints out something if it
doesn't work, but does not call `perror` or reset `errno`. Use
<fn:kjv_is_valid> to tell. */
2022-12-27 15:28:04 -05:00
struct kjv kjv(void) {
2022-12-27 02:49:57 -05:00
const char *const dir_kjv = "kjv";
2022-12-27 23:45:57 -05:00
struct text backing = text();
2022-12-27 15:28:04 -05:00
struct kjv kjv = { 0 };
2022-12-13 03:31:56 -05:00
DIR *dir = 0;
struct dirent *de = 0;
struct { size_t offset; int is; } build[KJV_BOOK_SIZE] = { 0 };
enum kjv_book b = 0;
2022-12-27 15:28:04 -05:00
int attempted_closedir = 0;
2022-12-13 03:31:56 -05:00
/* For all files in directory KJV with <#>*.txt, read into backing. */
if(chdir(dir_kjv) == -1 || !(dir = opendir("."))) goto catch;
while((de = readdir(dir))) {
2022-12-13 03:31:56 -05:00
unsigned ordinal;
2022-12-27 16:48:45 -05:00
char *unstable_backing;
2022-12-27 02:31:08 -05:00
if(!looks_like_book_fn(de->d_name, &ordinal)) continue;
/*fprintf(stderr, "<%s> ordinal: %u\n", de->d_name, ordinal);*/
2022-12-13 03:31:56 -05:00
if(ordinal < 1 || ordinal > KJV_BOOK_SIZE)
{ errno = ERANGE; goto catch; } /* Not in range. */
if(build[b = ordinal - 1].is) /* Convert to zero-based. */
2022-12-27 02:31:08 -05:00
{ errno = EDOM; goto catch; } /* Is duplicate. */
2022-12-27 16:48:45 -05:00
if(!(unstable_backing = text_append_file(&backing, de->d_name)))
2022-12-27 02:31:08 -05:00
goto catch;
build[b].is = 1;
2022-12-27 16:48:45 -05:00
build[b].offset = (size_t)(unstable_backing - backing.a.data);
2022-12-13 03:31:56 -05:00
}
if(attempted_closedir = 1, closedir(dir) == -1) goto catch; dir = 0;
/* Now backing is stable; count all the words for each verse. */
for(b = 0; b < KJV_BOOK_SIZE; b++) {
struct lex x;
if(!build[b].is) { fprintf(stderr, "Missing book [%u]%s.\n",
b + 1, kjv_book_string[b]); errno = EDOM; goto catch; }
2022-12-27 15:28:04 -05:00
x = lex(backing.a.data + build[b].offset);
2022-12-14 00:05:29 -05:00
while(lex_next_verse(&x)) {
const union kjvcite cite
= { .book = b, .chapter = x.chapter, .verse = x.verse };
unsigned *words;
switch(verse_table_assign(&kjv.verses, cite, &words)) {
2022-12-14 00:05:29 -05:00
case TABLE_PRESENT: fprintf(stderr, "[%u]%s %u:%u duplicated.\n",
b + 1, kjv_book_string[b], x.chapter, x.verse); errno = EDOM;
2022-12-14 00:05:29 -05:00
case TABLE_ERROR: goto catch;
case TABLE_ABSENT: break;
2022-12-14 00:05:29 -05:00
}
2022-12-27 16:01:51 -05:00
*words = x.words, kjv.total_words += x.words;
2022-12-14 00:05:29 -05:00
}
if(x.error) { fprintf(stderr, "[%u]%s on line %zu\n",
b + 1, kjv_book_string[b], x.line); goto catch; }
2022-12-13 03:31:56 -05:00
}
goto finally;
catch:
if(de) fprintf(stderr, "While reading %s.\n", de->d_name);
if(dir && !attempted_closedir && closedir(dir) == -1) perror(dir_kjv);
2022-12-27 15:28:04 -05:00
kjv_(&kjv);
2022-12-13 03:31:56 -05:00
finally:
2022-12-27 15:28:04 -05:00
text_(&backing);
return kjv;
2022-12-13 03:31:56 -05:00
}
2022-12-27 15:28:04 -05:00
2022-12-27 16:48:45 -05:00
/** Has loaded properly? Otherwise, probably `errno` is set. */
int kjv_is_valid(const struct kjv *const kjv)
{ return kjv && kjv->verses.buckets; }
/** Adds `cite` to `kjv` if not present. @return Is the kjv still valid. */
2022-12-27 16:01:51 -05:00
int kjv_add(struct kjv *const kjv, const union kjvcite cite) {
if(!kjv) return 0;
switch(kjvset_table_try(&kjv->set, cite)) {
case TABLE_ERROR: return 0;
case TABLE_ABSENT: kjv->set_words += verse_table_get(&kjv->verses, cite);
case TABLE_PRESENT: break;
}
return 1;
2022-12-27 15:28:04 -05:00
}
2022-12-27 16:01:51 -05:00
const char *kjv_to_string(const struct kjv *const kjv)
2022-12-27 15:28:04 -05:00
{ return kjv ? verse_table_to_string(&kjv->verses) : ""; }
2022-12-27 16:01:51 -05:00
const char *kjv_set_to_string(const struct kjv *const kjv)
{ return kjv ? kjvset_table_to_string(&kjv->set) : 0; }