diff --git a/kjv/src/kjv.re_c.c b/kjv/src/kjv.re_c.c index e83c2a0..55b0107 100644 --- a/kjv/src/kjv.re_c.c +++ b/kjv/src/kjv.re_c.c @@ -19,33 +19,37 @@ #include "../src/array.h" /** Append a text file, `fn`, to `c`, and add a '\0'. - @return Success. A partial read is failure. @throws[fopen, fread, malloc] + @return The start of the appended file or null on error. A partial read is a + failure. @throws[fopen, fread, malloc] @throws[EISEQ] The text file has embedded nulls. @throws[ERANGE] If the standard library does not follow POSIX. */ -static int append_file(struct char_array *c, const char *const fn) { +static char *append_file(struct char_array *text, const char *const fn) { FILE *fp = 0; const size_t granularity = 1024; - size_t nread; + size_t nread, start; char *cursor; - int success = 0; - assert(c && fn); + int success = 1; + assert(text && fn); + start = text->size; if(!(fp = fopen(fn, "r"))) goto catch; /* Read entire file in chunks. */ - do if(!(cursor = char_array_buffer(c, granularity)) + do if(!(cursor = char_array_buffer(text, granularity)) || (nread = fread(cursor, 1, granularity, fp), ferror(fp)) - || !char_array_append(c, nread)) goto catch; + || !char_array_append(text, nread)) goto catch; while(nread == granularity); /* File to `C` string. */ - if(!(cursor = char_array_new(c))) goto catch; + if(!(cursor = char_array_new(text))) goto catch; *cursor = '\0'; - /* Binary files with embedded '\0' are not allowed. */ - if(strchr(c->data, '\0') != cursor) { errno = EILSEQ; goto catch; } - { success = 1; goto finally; } + /* Binary files with embedded '\0' are not allowed; check just this read. */ + if(strchr(text->data + start, '\0') != cursor) + { errno = EILSEQ; goto catch; } + goto finally; catch: if(!errno) errno = EILSEQ; /* Will never be true on POSIX. */ + success = 0; finally: if(fp) fclose(fp); - return success; + return success ? text->data + start : 0; } @@ -245,12 +249,12 @@ static uint32_t lowbias32_r(uint32_t x) { return x; } +/** Two hash-tables use the same structure. */ union kjvcite { /* Overkill, but no initializing unused bits, 12 + 13 + 7 = 32. */ struct { unsigned verse : 12, chapter : 13, book : 7; }; uint32_t u32; }; - static uint32_t kjv_hash(const union kjvcite x) { return lowbias32(x.u32); } static union kjvcite kjv_unhash(const uint32_t x) { union kjvcite k; @@ -261,25 +265,25 @@ static void kjv_to_string(const union kjvcite x, char (*const a)[12]) { sprintf(*a, "%.4s%u:%u", kjv_book_string[x.book], (x.chapter + 1) % 1000, (x.verse + 1) % 1000); } -static uint32_t kjvword_hash(const union kjvcite x) { return kjv_hash(x); } -static union kjvcite kjvword_unhash(const uint32_t x) { return kjv_unhash(x); } -static void kjvword_to_string(const union kjvcite x, char (*const a)[12]) +/** Derived information on verse word count. */ +static uint32_t verse_hash(const union kjvcite x) { return kjv_hash(x); } +static union kjvcite verse_unhash(const uint32_t x) { return kjv_unhash(x); } +static void verse_to_string(const union kjvcite x, char (*const a)[12]) { kjv_to_string(x, a); } - -#define TABLE_NAME kjvword +#define TABLE_NAME verse #define TABLE_KEY union kjvcite #define TABLE_UINT uint32_t #define TABLE_VALUE unsigned -#define TABLE_DEFAULT 0 #define TABLE_INVERSE +#define TABLE_DEFAULT 0 #define TABLE_TO_STRING #include "../src/table.h" +/* A set of verses. */ static uint32_t kjvset_hash(const union kjvcite x) { return kjv_hash(x); } static union kjvcite kjvset_unhash(const uint32_t x) { return kjv_unhash(x); } static void kjvset_to_string(const union kjvcite x, char (*const a)[12]) { kjv_to_string(x, a); } - #define TABLE_NAME kjvset #define TABLE_KEY union kjvcite #define TABLE_UINT uint32_t @@ -288,76 +292,78 @@ static void kjvset_to_string(const union kjvcite x, char (*const a)[12]) #include "../src/table.h" int main(void) { - const char *const dir_name = "KJV"; - struct char_array kjv[KJV_BOOK_SIZE] = { 0 }; - struct kjvword_table words = { 0 }; - int success = EXIT_SUCCESS; + const char *const dir_kjv = "KJV"; + struct { + struct char_array backing; + struct verse_table verses; + size_t words; + } kjv = { 0 }; DIR *dir = 0; struct dirent *de = 0; - unsigned i; - size_t cum_words = 0; + struct { size_t offset; int is; } build[KJV_BOOK_SIZE] = { 0 }; + enum kjv_book b = 0; + int success = EXIT_SUCCESS, attempted_closedir = 0; errno = 0; - /* Read in the kjv from all files. This is overkill, we don't need to keep - all the data, just count. Maybe we'll do something else later? */ - if(chdir(dir_name) == -1 || !(dir = opendir("."))) goto catch; - while((de = readdir(dir))) { /* For all files in directory. */ + /* For all files in directory KJV with <#>*.txt, read into backing. */ + if(chdir(dir_kjv) == -1 || !(dir = opendir("."))) goto catch; + while((de = readdir(dir))) { unsigned ordinal; - enum kjv_book b; - if(!kjv_filename(de->d_name, &ordinal)) /* Extract ordinal. */ - { /*fprintf(stderr, "Ignored <%s>.\n", de->d_name);*/ continue; } + char *unstable_book; + if(!kjv_filename(de->d_name, &ordinal)) continue; /* Extract no. */ /*fprintf(stderr, "<%s> ordinal: %u\n", de->d_name, ordinal);*/ if(ordinal < 1 || ordinal > KJV_BOOK_SIZE) { errno = ERANGE; goto catch; } /* Not in range. */ - if(kjv[b = ordinal - 1].data) /* Convert to zero-based. */ + if(build[b = ordinal - 1].is) /* Convert to zero-based. */ { errno = EDOM; goto catch; } /* Duplicate. */ - if(!append_file(kjv + b, de->d_name)) goto catch; + if(!(unstable_book = append_file(&kjv.backing, de->d_name))) goto catch; + build[b].is = 1; + build[b].offset = (size_t)(unstable_book - kjv.backing.data); } - closedir(dir), de = 0, dir = 0; + if(attempted_closedir = 1, closedir(dir) == -1) goto catch; dir = 0; - /* Parse number of words in each verse. */ - for(i = 0; i < KJV_BOOK_SIZE; i++) { - struct lex x = lex(kjv[i].data); - if(!x.cursor) { fprintf(stderr, "Missing book [%u]%s.\n", - i + 1, kjv_book_string[i]); errno = EDOM; goto catch; } - /*printf("[%u]%s: cumulative %zu.\n", - i + 1, kjv_book_string[i], cum_words);*/ + /* Now backing is stable; count all the words for each verse. */ + for(b = 0; b < KJV_BOOK_SIZE; b++) { + struct lex x; + if(!build[b].is) { fprintf(stderr, "Missing book [%u]%s.\n", + b + 1, kjv_book_string[b]); errno = EDOM; goto catch; } + x = lex(kjv.backing.data + build[b].offset); while(lex_next_verse(&x)) { - const union kjvcite c - = { .verse = x.verse, .chapter = x.chapter, .book = i }; - unsigned *w; - switch(kjvword_table_assign(&words, c, &w)) { + const union kjvcite cite + = { .book = b, .chapter = x.chapter, .verse = x.verse }; + unsigned *words; + switch(verse_table_assign(&kjv.verses, cite, &words)) { case TABLE_PRESENT: fprintf(stderr, "[%u]%s %u:%u duplicated.\n", - i + 1, kjv_book_string[i], x.chapter, x.verse); errno = EDOM; + b + 1, kjv_book_string[b], x.chapter, x.verse); errno = EDOM; case TABLE_ERROR: goto catch; - case TABLE_ABSENT: *w = x.words; break; + case TABLE_ABSENT: break; } - cum_words += x.words; + *words = x.words, kjv.words += x.words; } if(x.error) { fprintf(stderr, "[%u]%s on line %zu\n", - i + 1, kjv_book_string[i], x.line); goto catch; } + b + 1, kjv_book_string[b], x.line); goto catch; } } - printf("words: %s\n", kjvword_table_to_string(&words)); - printf("kjv: %zu total words\n", cum_words); + printf("words: %s\n", verse_table_to_string(&kjv.verses)); + printf("kjv: %zu total words\n", kjv.words); { union kjvcite c; - struct kjvword_table_iterator it = kjvword_table_begin(&words); + struct verse_table_iterator it = verse_table_begin(&kjv.verses); unsigned *w; - while(kjvword_table_next(&it, &c, &w)) + while(verse_table_next(&it, &c, &w)) printf("%s %u:%u -> %u\n", kjv_book_string[c.book], c.chapter, c.verse, *w); c = (union kjvcite){ .book = Genesis, .chapter = 1, .verse = 1 }; - printf("1:1:1 -> %u\n", kjvword_table_get(&words, c)); + printf("1:1:1 -> %u\n", verse_table_get(&kjv.verses, c)); } goto finally; catch: success = EXIT_FAILURE; - perror(de ? de->d_name : dir_name); - if(dir && closedir(dir)) perror(dir_name); + if(de) fprintf(stderr, "While reading %s.\n", de->d_name); + perror(de ? de->d_name : dir_kjv); + if(dir && !attempted_closedir && closedir(dir) == -1) perror(dir_kjv); finally: - /*kjvset_table_();*/ - kjvword_table_(&words); - for(i = 0; i < KJV_BOOK_SIZE; i++) char_array_(kjv + i); + verse_table_(&kjv.verses); + char_array_(&kjv.backing); return success; }