Instead of 66 objects, have 1 object. More confusing, but faster?
This commit is contained in:
parent
2807b5704d
commit
5d3e027f36
|
@ -19,33 +19,37 @@
|
||||||
#include "../src/array.h"
|
#include "../src/array.h"
|
||||||
|
|
||||||
/** Append a text file, `fn`, to `c`, and add a '\0'.
|
/** Append a text file, `fn`, to `c`, and add a '\0'.
|
||||||
@return Success. A partial read is failure. @throws[fopen, fread, malloc]
|
@return The start of the appended file or null on error. A partial read is a
|
||||||
|
failure. @throws[fopen, fread, malloc]
|
||||||
@throws[EISEQ] The text file has embedded nulls.
|
@throws[EISEQ] The text file has embedded nulls.
|
||||||
@throws[ERANGE] If the standard library does not follow POSIX. */
|
@throws[ERANGE] If the standard library does not follow POSIX. */
|
||||||
static int append_file(struct char_array *c, const char *const fn) {
|
static char *append_file(struct char_array *text, const char *const fn) {
|
||||||
FILE *fp = 0;
|
FILE *fp = 0;
|
||||||
const size_t granularity = 1024;
|
const size_t granularity = 1024;
|
||||||
size_t nread;
|
size_t nread, start;
|
||||||
char *cursor;
|
char *cursor;
|
||||||
int success = 0;
|
int success = 1;
|
||||||
assert(c && fn);
|
assert(text && fn);
|
||||||
|
start = text->size;
|
||||||
if(!(fp = fopen(fn, "r"))) goto catch;
|
if(!(fp = fopen(fn, "r"))) goto catch;
|
||||||
/* Read entire file in chunks. */
|
/* Read entire file in chunks. */
|
||||||
do if(!(cursor = char_array_buffer(c, granularity))
|
do if(!(cursor = char_array_buffer(text, granularity))
|
||||||
|| (nread = fread(cursor, 1, granularity, fp), ferror(fp))
|
|| (nread = fread(cursor, 1, granularity, fp), ferror(fp))
|
||||||
|| !char_array_append(c, nread)) goto catch;
|
|| !char_array_append(text, nread)) goto catch;
|
||||||
while(nread == granularity);
|
while(nread == granularity);
|
||||||
/* File to `C` string. */
|
/* File to `C` string. */
|
||||||
if(!(cursor = char_array_new(c))) goto catch;
|
if(!(cursor = char_array_new(text))) goto catch;
|
||||||
*cursor = '\0';
|
*cursor = '\0';
|
||||||
/* Binary files with embedded '\0' are not allowed. */
|
/* Binary files with embedded '\0' are not allowed; check just this read. */
|
||||||
if(strchr(c->data, '\0') != cursor) { errno = EILSEQ; goto catch; }
|
if(strchr(text->data + start, '\0') != cursor)
|
||||||
{ success = 1; goto finally; }
|
{ errno = EILSEQ; goto catch; }
|
||||||
|
goto finally;
|
||||||
catch:
|
catch:
|
||||||
if(!errno) errno = EILSEQ; /* Will never be true on POSIX. */
|
if(!errno) errno = EILSEQ; /* Will never be true on POSIX. */
|
||||||
|
success = 0;
|
||||||
finally:
|
finally:
|
||||||
if(fp) fclose(fp);
|
if(fp) fclose(fp);
|
||||||
return success;
|
return success ? text->data + start : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -245,12 +249,12 @@ static uint32_t lowbias32_r(uint32_t x) {
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Two hash-tables use the same structure. */
|
||||||
union kjvcite {
|
union kjvcite {
|
||||||
/* Overkill, but no initializing unused bits, 12 + 13 + 7 = 32. */
|
/* Overkill, but no initializing unused bits, 12 + 13 + 7 = 32. */
|
||||||
struct { unsigned verse : 12, chapter : 13, book : 7; };
|
struct { unsigned verse : 12, chapter : 13, book : 7; };
|
||||||
uint32_t u32;
|
uint32_t u32;
|
||||||
};
|
};
|
||||||
|
|
||||||
static uint32_t kjv_hash(const union kjvcite x) { return lowbias32(x.u32); }
|
static uint32_t kjv_hash(const union kjvcite x) { return lowbias32(x.u32); }
|
||||||
static union kjvcite kjv_unhash(const uint32_t x) {
|
static union kjvcite kjv_unhash(const uint32_t x) {
|
||||||
union kjvcite k;
|
union kjvcite k;
|
||||||
|
@ -261,25 +265,25 @@ static void kjv_to_string(const union kjvcite x, char (*const a)[12])
|
||||||
{ sprintf(*a, "%.4s%u:%u", kjv_book_string[x.book],
|
{ sprintf(*a, "%.4s%u:%u", kjv_book_string[x.book],
|
||||||
(x.chapter + 1) % 1000, (x.verse + 1) % 1000); }
|
(x.chapter + 1) % 1000, (x.verse + 1) % 1000); }
|
||||||
|
|
||||||
static uint32_t kjvword_hash(const union kjvcite x) { return kjv_hash(x); }
|
/** Derived information on verse word count. */
|
||||||
static union kjvcite kjvword_unhash(const uint32_t x) { return kjv_unhash(x); }
|
static uint32_t verse_hash(const union kjvcite x) { return kjv_hash(x); }
|
||||||
static void kjvword_to_string(const union kjvcite x, char (*const a)[12])
|
static union kjvcite verse_unhash(const uint32_t x) { return kjv_unhash(x); }
|
||||||
|
static void verse_to_string(const union kjvcite x, char (*const a)[12])
|
||||||
{ kjv_to_string(x, a); }
|
{ kjv_to_string(x, a); }
|
||||||
|
#define TABLE_NAME verse
|
||||||
#define TABLE_NAME kjvword
|
|
||||||
#define TABLE_KEY union kjvcite
|
#define TABLE_KEY union kjvcite
|
||||||
#define TABLE_UINT uint32_t
|
#define TABLE_UINT uint32_t
|
||||||
#define TABLE_VALUE unsigned
|
#define TABLE_VALUE unsigned
|
||||||
#define TABLE_DEFAULT 0
|
|
||||||
#define TABLE_INVERSE
|
#define TABLE_INVERSE
|
||||||
|
#define TABLE_DEFAULT 0
|
||||||
#define TABLE_TO_STRING
|
#define TABLE_TO_STRING
|
||||||
#include "../src/table.h"
|
#include "../src/table.h"
|
||||||
|
|
||||||
|
/* A set of verses. */
|
||||||
static uint32_t kjvset_hash(const union kjvcite x) { return kjv_hash(x); }
|
static uint32_t kjvset_hash(const union kjvcite x) { return kjv_hash(x); }
|
||||||
static union kjvcite kjvset_unhash(const uint32_t x) { return kjv_unhash(x); }
|
static union kjvcite kjvset_unhash(const uint32_t x) { return kjv_unhash(x); }
|
||||||
static void kjvset_to_string(const union kjvcite x, char (*const a)[12])
|
static void kjvset_to_string(const union kjvcite x, char (*const a)[12])
|
||||||
{ kjv_to_string(x, a); }
|
{ kjv_to_string(x, a); }
|
||||||
|
|
||||||
#define TABLE_NAME kjvset
|
#define TABLE_NAME kjvset
|
||||||
#define TABLE_KEY union kjvcite
|
#define TABLE_KEY union kjvcite
|
||||||
#define TABLE_UINT uint32_t
|
#define TABLE_UINT uint32_t
|
||||||
|
@ -288,76 +292,78 @@ static void kjvset_to_string(const union kjvcite x, char (*const a)[12])
|
||||||
#include "../src/table.h"
|
#include "../src/table.h"
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
const char *const dir_name = "KJV";
|
const char *const dir_kjv = "KJV";
|
||||||
struct char_array kjv[KJV_BOOK_SIZE] = { 0 };
|
struct {
|
||||||
struct kjvword_table words = { 0 };
|
struct char_array backing;
|
||||||
int success = EXIT_SUCCESS;
|
struct verse_table verses;
|
||||||
|
size_t words;
|
||||||
|
} kjv = { 0 };
|
||||||
DIR *dir = 0;
|
DIR *dir = 0;
|
||||||
struct dirent *de = 0;
|
struct dirent *de = 0;
|
||||||
unsigned i;
|
struct { size_t offset; int is; } build[KJV_BOOK_SIZE] = { 0 };
|
||||||
size_t cum_words = 0;
|
enum kjv_book b = 0;
|
||||||
|
int success = EXIT_SUCCESS, attempted_closedir = 0;
|
||||||
errno = 0;
|
errno = 0;
|
||||||
|
|
||||||
/* Read in the kjv from all files. This is overkill, we don't need to keep
|
/* For all files in directory KJV with <#>*.txt, read into backing. */
|
||||||
all the data, just count. Maybe we'll do something else later? */
|
if(chdir(dir_kjv) == -1 || !(dir = opendir("."))) goto catch;
|
||||||
if(chdir(dir_name) == -1 || !(dir = opendir("."))) goto catch;
|
while((de = readdir(dir))) {
|
||||||
while((de = readdir(dir))) { /* For all files in directory. */
|
|
||||||
unsigned ordinal;
|
unsigned ordinal;
|
||||||
enum kjv_book b;
|
char *unstable_book;
|
||||||
if(!kjv_filename(de->d_name, &ordinal)) /* Extract ordinal. */
|
if(!kjv_filename(de->d_name, &ordinal)) continue; /* Extract no. */
|
||||||
{ /*fprintf(stderr, "Ignored <%s>.\n", de->d_name);*/ continue; }
|
|
||||||
/*fprintf(stderr, "<%s> ordinal: %u\n", de->d_name, ordinal);*/
|
/*fprintf(stderr, "<%s> ordinal: %u\n", de->d_name, ordinal);*/
|
||||||
if(ordinal < 1 || ordinal > KJV_BOOK_SIZE)
|
if(ordinal < 1 || ordinal > KJV_BOOK_SIZE)
|
||||||
{ errno = ERANGE; goto catch; } /* Not in range. */
|
{ errno = ERANGE; goto catch; } /* Not in range. */
|
||||||
if(kjv[b = ordinal - 1].data) /* Convert to zero-based. */
|
if(build[b = ordinal - 1].is) /* Convert to zero-based. */
|
||||||
{ errno = EDOM; goto catch; } /* Duplicate. */
|
{ errno = EDOM; goto catch; } /* Duplicate. */
|
||||||
if(!append_file(kjv + b, de->d_name)) goto catch;
|
if(!(unstable_book = append_file(&kjv.backing, de->d_name))) goto catch;
|
||||||
|
build[b].is = 1;
|
||||||
|
build[b].offset = (size_t)(unstable_book - kjv.backing.data);
|
||||||
}
|
}
|
||||||
closedir(dir), de = 0, dir = 0;
|
if(attempted_closedir = 1, closedir(dir) == -1) goto catch; dir = 0;
|
||||||
|
|
||||||
/* Parse number of words in each verse. */
|
/* Now backing is stable; count all the words for each verse. */
|
||||||
for(i = 0; i < KJV_BOOK_SIZE; i++) {
|
for(b = 0; b < KJV_BOOK_SIZE; b++) {
|
||||||
struct lex x = lex(kjv[i].data);
|
struct lex x;
|
||||||
if(!x.cursor) { fprintf(stderr, "Missing book [%u]%s.\n",
|
if(!build[b].is) { fprintf(stderr, "Missing book [%u]%s.\n",
|
||||||
i + 1, kjv_book_string[i]); errno = EDOM; goto catch; }
|
b + 1, kjv_book_string[b]); errno = EDOM; goto catch; }
|
||||||
/*printf("[%u]%s: cumulative %zu.\n",
|
x = lex(kjv.backing.data + build[b].offset);
|
||||||
i + 1, kjv_book_string[i], cum_words);*/
|
|
||||||
while(lex_next_verse(&x)) {
|
while(lex_next_verse(&x)) {
|
||||||
const union kjvcite c
|
const union kjvcite cite
|
||||||
= { .verse = x.verse, .chapter = x.chapter, .book = i };
|
= { .book = b, .chapter = x.chapter, .verse = x.verse };
|
||||||
unsigned *w;
|
unsigned *words;
|
||||||
switch(kjvword_table_assign(&words, c, &w)) {
|
switch(verse_table_assign(&kjv.verses, cite, &words)) {
|
||||||
case TABLE_PRESENT: fprintf(stderr, "[%u]%s %u:%u duplicated.\n",
|
case TABLE_PRESENT: fprintf(stderr, "[%u]%s %u:%u duplicated.\n",
|
||||||
i + 1, kjv_book_string[i], x.chapter, x.verse); errno = EDOM;
|
b + 1, kjv_book_string[b], x.chapter, x.verse); errno = EDOM;
|
||||||
case TABLE_ERROR: goto catch;
|
case TABLE_ERROR: goto catch;
|
||||||
case TABLE_ABSENT: *w = x.words; break;
|
case TABLE_ABSENT: break;
|
||||||
}
|
}
|
||||||
cum_words += x.words;
|
*words = x.words, kjv.words += x.words;
|
||||||
}
|
}
|
||||||
if(x.error) { fprintf(stderr, "[%u]%s on line %zu\n",
|
if(x.error) { fprintf(stderr, "[%u]%s on line %zu\n",
|
||||||
i + 1, kjv_book_string[i], x.line); goto catch; }
|
b + 1, kjv_book_string[b], x.line); goto catch; }
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("words: %s\n", kjvword_table_to_string(&words));
|
printf("words: %s\n", verse_table_to_string(&kjv.verses));
|
||||||
printf("kjv: %zu total words\n", cum_words);
|
printf("kjv: %zu total words\n", kjv.words);
|
||||||
{
|
{
|
||||||
union kjvcite c;
|
union kjvcite c;
|
||||||
struct kjvword_table_iterator it = kjvword_table_begin(&words);
|
struct verse_table_iterator it = verse_table_begin(&kjv.verses);
|
||||||
unsigned *w;
|
unsigned *w;
|
||||||
while(kjvword_table_next(&it, &c, &w))
|
while(verse_table_next(&it, &c, &w))
|
||||||
printf("%s %u:%u -> %u\n",
|
printf("%s %u:%u -> %u\n",
|
||||||
kjv_book_string[c.book], c.chapter, c.verse, *w);
|
kjv_book_string[c.book], c.chapter, c.verse, *w);
|
||||||
c = (union kjvcite){ .book = Genesis, .chapter = 1, .verse = 1 };
|
c = (union kjvcite){ .book = Genesis, .chapter = 1, .verse = 1 };
|
||||||
printf("1:1:1 -> %u\n", kjvword_table_get(&words, c));
|
printf("1:1:1 -> %u\n", verse_table_get(&kjv.verses, c));
|
||||||
}
|
}
|
||||||
goto finally;
|
goto finally;
|
||||||
catch:
|
catch:
|
||||||
success = EXIT_FAILURE;
|
success = EXIT_FAILURE;
|
||||||
perror(de ? de->d_name : dir_name);
|
if(de) fprintf(stderr, "While reading %s.\n", de->d_name);
|
||||||
if(dir && closedir(dir)) perror(dir_name);
|
perror(de ? de->d_name : dir_kjv);
|
||||||
|
if(dir && !attempted_closedir && closedir(dir) == -1) perror(dir_kjv);
|
||||||
finally:
|
finally:
|
||||||
/*kjvset_table_();*/
|
verse_table_(&kjv.verses);
|
||||||
kjvword_table_(&words);
|
char_array_(&kjv.backing);
|
||||||
for(i = 0; i < KJV_BOOK_SIZE; i++) char_array_(kjv + i);
|
|
||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue