transferred from offsets to pointers

This commit is contained in:
Neil 2022-12-28 23:54:59 -08:00
parent e786b40167
commit 940d305258
11 changed files with 727 additions and 366 deletions

View File

@ -29,16 +29,16 @@ else
CF += -g
endif
projects := bin/test-text bin/test-kjv bin/test-journal bin/interpret
projects := bin/test-text bin/test-kjv bin/test-journal bin/kjv
#docs := $(patsubst test/test_%.c, doc/%.md, $(wildcard test/test_*.c))
default: $(projects)
# success
bin/test-text: build/test_text.o build/text.o
bin/test-kjv: build/test_kjv.o build/text.o build/kjv.o
bin/test-journal: build/test_journal.o build/text.o build/journal.o
bin/interpret: build/interpret.o build/scan.o build/journal.o build/kjv.o build/text.o
bin/test-text: build/text.o build/test_text.o
bin/test-kjv: build/text.o build/kjv.o build/test_kjv.o
bin/test-journal: build/text.o build/journal.o build/test_journal.o
bin/kjv: build/text.o build/journal.o build/kjv.o build/scan_kjv.o
bin/%:
@echo "\033[1;36mlinking $@\033[0m"
@ -68,7 +68,7 @@ build/%.c: src/%.re.c
# # https://github.com/neil-edelman/cdoc documentation
# -cdoc -o $@ $<
.SECONDARY: build/kjv.c build/journal.c build/scan.c
.SECONDARY: build/kjv.c build/journal.c build/scan_kjv.c
.PHONY: clean release test
test: $(projects)

16
src/helper.h Normal file
View File

@ -0,0 +1,16 @@
#include <errno.h>
#include <stdint.h>
/** Parse unsigned; [`s`,`e`) => `n`. */
static int helper_natural(const char *s, const char *const e, uint32_t *const n)
{
uint32_t accum = 0;
while(s < e) {
unsigned next = accum * 10 + (unsigned)(*s - '0');
if(accum >= next) return errno = ERANGE, 0;
accum = next;
s++;
}
*n = accum;
return 1;
}

View File

@ -1,4 +1,6 @@
#ifndef OMIT_BASE /* <!-- base */
#include <stddef.h>
union load { const char *text; size_t offset; };
#include <stdint.h> /* C99 */
/** Assumes: reverse ordering of byte-fields; unsigned is defined; C11 and GNU
anonymous unions. */
@ -6,6 +8,7 @@ union date32 {
struct { uint32_t day : 5, month : 4, year : 23; }; /* C11, reverse */
uint32_t u32;
};
void date32_to_string(const union date32 d, char (*const a)[12]);
#else /* base --><!-- !base */
#undef OMIT_BASE
#endif /* !base --> */
@ -15,6 +18,11 @@ union date32 {
struct tree_day_node;
struct tree_day_tree { struct tree_day_node *node; unsigned height; };
struct day_tree { struct tree_day_tree root; };
struct tree_day_ref { struct tree_day_node *node; unsigned height, idx; };
struct tree_day_iterator {
struct tree_day_tree *root; struct tree_day_ref ref; int seen;
};
struct day_tree_iterator { struct tree_day_iterator _; };
#else /* page --><!-- !page */
#undef OMIT_DAY
#endif /* !page --> */
@ -23,10 +31,15 @@ struct day_tree { struct tree_day_tree root; };
#ifndef OMIT_PROTO /* <!-- proto */
#include "text.h"
struct journal { struct day_tree days; struct text backing; };
struct journal_iterator { struct day_tree_iterator _; };
struct journal journal(void);
void journal_(struct journal *);
int journal_is_valid(const struct journal *);
const char *journal_to_string(const struct journal *);
struct journal_iterator journal_begin(struct journal *const j);
struct journal_iterator journal_begin_at(struct journal *, const union date32);
union load;
int journal_next(struct journal_iterator *, union date32 *, union load **);
#else /* proto --><!-- !proto */
#undef OMIT_PROTO
#endif /* !proto --> */

View File

@ -12,8 +12,7 @@
#include <dirent.h> /* opendir readdir closedir */
union load { const char *text; size_t offset; };
static void date32_to_string(const union date32 d, char (*const a)[12]) {
void date32_to_string(const union date32 d, char (*const a)[12]) {
sprintf(*a, "%" PRIu32 "-%2.2" PRIu32 "-%2.2" PRIu32,
d.year % 10000, d.month % 100, d.day % 100);
}
@ -101,39 +100,6 @@ static unsigned looks_like_day(const char *const a) {
*/
}
#if 0
/** Is `y` a leap-year? */
static int leap(int y) {
assert(y >= 1582);
if(!(y % 400)) return 1;
if(!(y % 100)) return 0;
if(!(y % 4)) return 1;
return 0;
}
/** @return Pack into `date32` or return zero. */
static union date32 date_to_32(const int y, const int m, const int d) {
union date32 d32 = { 0 };
/* Leap year calculations only work at y>=1 and Gregorian Calendar and max
23 bits. */
if(y < 1582 || y > 8388607 || m < 1 || m > 12 || d < 1 || d > 31) goto no;
switch(m) {
case 1: case 3: case 5: case 7: case 8: case 10: case 12: break;
case 4: case 6: case 9: case 11: if(d > 30) goto no; break;
case 2: if(d > 28 + leap(y)) goto no; break;
default: assert(0); break;
}
d32.year = (unsigned)y, d32.month = (unsigned)m, d32.day = (unsigned)d;
no:
return d32;
}
/** Tomohiko Sakamoto comp.lang.c 1993-04-10. */
static unsigned weekday(union date32 d) {
d.year -= d.month < 3;
return (d.year + d.year / 4 - d.year / 100 + d.year / 400
+ "-bed=pen+mad."[d.month] + d.day) % 7;
}
#endif
#define OMIT_BASE
#define OMIT_DAY
@ -157,6 +123,8 @@ struct journal journal(void) {
struct int_array years = int_array(), months = int_array(),
days = int_array();
int *y = 0, *y_end, *m = 0, *m_end, *d = 0, *d_end;
struct day_tree_iterator it;
union load *v;
/* Get the years list as directories matching a year. */
if(chdir(dir_journal) == -1 || !(dir = opendir("."))) goto catch;
@ -236,8 +204,13 @@ struct journal journal(void) {
if(chdir("..") == -1) goto catch;
/*if(*y == 1993) break;*/
}
day_tree_bulk_finish(&j.days);
if(chdir("..") == -1) goto catch;
if(chdir("..") == -1 || !day_tree_bulk_finish(&j.days)) goto catch;
/* Structure is now stable. */
it = day_tree_begin(&j.days); while(day_tree_next(&it, 0, &v)) {
/*printf("%zu\n", v->offset);*/
v->text = j.backing.a.data + v->offset;
/*printf("%.60s\n", v->text);*/
}
/*fprintf(stderr, "Journal has entries: %s\n",
day_tree_to_string(&j.days));*/
goto finally;
@ -262,3 +235,22 @@ int journal_is_valid(const struct journal *const j) {
/** @return `j` as a string. */
const char *journal_to_string(const struct journal *const j)
{ return day_tree_to_string(&j->days); }
struct journal_iterator journal_begin(struct journal *const j) {
struct journal_iterator it;
it._ = day_tree_begin(&j->days);
return it;
}
struct journal_iterator journal_begin_at(struct journal *const j,
const union date32 x) {
struct journal_iterator it;
it._ = day_tree_begin_at(&j->days, x);
return it;
}
int journal_next(struct journal_iterator *const it,
union date32 *const k, union load **v) {
return day_tree_next(&it->_, k, v);
}

View File

@ -80,6 +80,7 @@ union kjvcite {
struct { uint32_t verse : 12, chapter : 13, book : 7; }; /* C11, reverse */
uint32_t u32;
};
void kjvcite_to_string(const union kjvcite, char (*)[12]);
#else /* base --><!-- !base */
#undef OMIT_BASE
#endif /* !base --> */

View File

@ -11,16 +11,21 @@
#define OMIT_VERSES
#define OMIT_PROTO
#include "../src/kjv.h" /* Just the base data. */
#include "../src/helper.h"
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <assert.h>
#include <dirent.h> /* opendir readdir closedir */
#include <unistd.h> /* chdir (POSIX) (because I'm lazy) */
/* #include <cmph.h> No; overkill. */
void kjvcite_to_string(const union kjvcite x, char (*const a)[12])
{ sprintf(*a, "%.4s%" PRIu32 ":%" PRIu32,
kjv_book_string[x.book < KJV_BOOK_SIZE ? x.book : KJV_BOOK_SIZE],
x.chapter % 1000, x.verse % 1000); }
/* Reversible hash map. */
/** <https://nullprogram.com/blog/2018/07/31/>
<https://github.com/skeeto/hash-prospector> on `x`. */
@ -46,8 +51,7 @@ static uint32_t kjvset_hash(const union kjvcite x) { return lowbias32(x.u32); }
static union kjvcite kjvset_unhash(const uint32_t x)
{ union kjvcite k; k.u32 = lowbias32_r(x); return k; }
static void kjvset_to_string(const union kjvcite x, char (*const a)[12])
{ sprintf(*a, "%.4s%" PRIu32 ":%" PRIu32, kjv_book_string[x.book],
x.chapter % 1000, x.verse % 1000); }
{ kjvcite_to_string(x, a); }
#define TABLE_NAME kjvset
#define TABLE_KEY union kjvcite
#define TABLE_UINT uint32_t
@ -70,20 +74,6 @@ static void verse_to_string(const union kjvcite x, const unsigned count,
#include "../src/table.h"
/** Helper to parse unsigned; [`s`,`e`) => `n`. */
static int parse_natural(const char *s, const char *const e, unsigned *const n) {
unsigned accum = 0;
while(s < e) {
unsigned next = accum * 10 + (unsigned)(*s - '0');
if(accum >= next) return errno = ERANGE, 0;
accum = next;
s++;
}
*n = accum;
return 1;
}
/* Parse filename of books. */
/*!re2c /**/
re2c:yyfill:enable = 0;
@ -101,7 +91,7 @@ static int looks_like_book_fn(const char *fn, unsigned *const book_no) {
*
{ return 0; }
@s0 natural @s1 [^.\x00]* ".txt" "\x00"
{ return parse_natural(s0, s1, book_no); }
{ return helper_natural(s0, s1, book_no); }
*/
}
@ -111,7 +101,7 @@ struct lex {
size_t line;
const char *cursor;
int error;
unsigned chapter, verse, words;
uint32_t chapter, verse, words;
};
static struct lex lex(const char *cursor) {
struct lex lex;
@ -140,8 +130,8 @@ scan:
<line> [^[\]\n\x00]* "\n" { lex->line++; goto scan; }
<line> "\x00" { return 0; }
<line> "[" @s0 natural @s1 ":" @t0 natural @t1 "]" => verse {
if(!parse_natural(s0, s1, &lex->chapter)
|| !parse_natural(t0, t1, &lex->verse))
if(!helper_natural(s0, s1, &lex->chapter)
|| !helper_natural(t0, t1, &lex->verse))
return errno = EILSEQ, lex->error = 1, 0;
lex->words = 0;
/*printf("%u:%u", lex->chapter, lex->verse);*/

550
src/not-used.c Normal file
View File

@ -0,0 +1,550 @@
/** Is `y` a leap-year? */
static int leap(int y) {
assert(y >= 1582);
if(!(y % 400)) return 1;
if(!(y % 100)) return 0;
if(!(y % 4)) return 1;
return 0;
}
/** Convert or narrower type or return zero. */
static union date32 date_to_32(const int y, const int m, const int d) {
union date32 d32 = { 0 };
/* Leap year calculations only work at y>=1 and Gregorian Calendar and max
23 bits. */
if(y < 1582 || y > 8388607 || m < 1 || m > 12 || d < 1 || d > 31) goto no;
switch(m) {
case 1: case 3: case 5: case 7: case 8: case 10: case 12: break;
case 4: case 6: case 9: case 11: if(d > 30) goto no; break;
case 2: if(d > 28 + leap(y)) goto no; break;
default: assert(0); break;
}
d32.year = (unsigned)y, d32.month = (unsigned)m, d32.day = (unsigned)d;
no:
return d32;
}
/** Tomohiko Sakamoto comp.lang.c 1993-04-10. */
static unsigned weekday(union date32 d) {
d.year -= d.month < 3;
return (d.year + d.year / 4 - d.year / 100 + d.year / 400
+ "-bed=pen+mad."[d.month] + d.day) % 7;
}
/*
### plot with steps
reset session
$Data <<EOD
1,1,0
1,2,0
1,3,0
1,4,2
1,5,1
1,6,3
1,7,3
1,8,1
1,9,3
1,10,8
1,11,1
1,12,0
1,13,3
EOD
set title "Cumulative count" font ",16"
set xlabel "episode"
set ylabel "cumulative count"
set xtics 1
set key bottom right
set grid
unset border
set datafile separator comma
plot $Data u 2:($3) smooth cumulative with steps lw 2 lc "red" ti "cumulative count"
### end of code
*/
static int bible_graph(/*const*/ struct page_tree *const journal) {
enum { CHILL, BOOK, CHAPTER, WORD, NEXT } state = CHILL;
struct page_tree_entry entry = { 0, 0 };
struct lex *lex = 0;
size_t count = 0;
for(struct page_tree_iterator p_it = page_tree_begin(journal);
(entry = page_tree_next(&p_it)).key; ) {
struct page *const page = entry.value;
for(struct lex_array_iterator l_it = lex_array_iterator(&page->meaning);
(lex = lex_array_next(&l_it)); ) {
switch(lex->symbol) {
case KJV_BOOK:
if(state != CHILL && state != WORD) goto catch;
if(state == WORD) printf("\n");
fprintf(stderr, "%d-%.2d-%.2d: \"%.*s\", ",
entry.key->year, entry.key->month, entry.key->day,
(int)(lex->s1 - lex->s0), lex->s0);
state = BOOK;
break;
case KJV_CHAPTER_VERSE:
if(state != BOOK) goto catch;
printf("\"%.*s\", \"", (int)(lex->s1 - lex->s0), lex->s0);
state = CHAPTER;
break;
case KJV_NEXT:
if(state != WORD) goto catch;
printf("\"\n");
break;
case KJV_TEXT:
if(state != WORD && state != CHAPTER && state != NEXT)
goto catch;
printf("%s%.*s<%d>", state == WORD ? "*" : "",
(int)(lex->s1 - lex->s0 < 0 ? 10 : lex->s1 - lex->s0),
lex->s0, (int)(lex->s1 - lex->s0));
count++;
state = WORD;
break;
default:
if(state != CHILL && state != WORD) goto catch;
if(state == WORD) printf("\"\n"), state = CHILL;
break;
}
}
if(state != CHILL && state != WORD) goto catch;
if(state == WORD) printf("\n"), state = CHILL;
}
printf("Count: %lu.\n", (unsigned long)count);
return 1;
catch:
fprintf(stderr, "Bible error.\n");
if(entry.key) {
fprintf(stderr, "On date: %d-%.2d-%.2d.\n",
entry.key->year, entry.key->month, entry.key->day);
if(lex) fprintf(stderr, "At line %lu.\n", (unsigned long)lex->line);
}
errno = EILSEQ;
return 0;
}
#define C_BLACK "\033[0;30m"
#define C_RED "\033[0;31m"
#define C_GREEN "\033[0;32m"
#define C_YELLOW "\033[0;33m"
#define C_BLUE "\033[0;34m"
#define C_PURPLE "\033[0;35m"
#define C_CYAN "\033[0;36m"
#define C_WHITE "\033[0;37m"
#define CB_BLACK "\033[1;30m"
#define CB_RED "\033[1;31m"
#define CB_GREEN "\033[1;32m"
#define CB_YELLOW "\033[1;33m"
#define CB_BLUE "\033[1;34m"
#define CB_PURPLE "\033[1;35m"
#define CB_CYAN "\033[1;36m"
#define CB_WHITE "\033[1;37m"
#define C_RESET "\033[0m"
int main(int argc, char **argv) {
int success = EXIT_SUCCESS;
char *intent = 0;
/* For reading in files, overwritten. */
DIR *dir = 0;
struct dirent *de;
struct int_array years = int_array(), months = int_array(),
days = int_array();
int *y, *y_end;
struct page_tree journal = page_tree();
errno = 0;
if(argc != 2) { intent = "needs journal location, which should"
" contain <year>/<month>/<day>.txt"; goto catch; }
/* Get the years list as directories matching a year. */
if(chdir(argv[1]) == -1 || !(dir = opendir("."))) goto catch;
while((de = readdir(dir))) {
struct stat st;
int year, *p;
if(!lex_looks_like_year(de->d_name, &year)) continue;
if(stat(de->d_name, &st)) goto catch;
if(!S_ISDIR(st.st_mode)) continue;
if(!(p = int_array_new(&years))) goto catch;
*p = year;
}
closedir(dir), dir = 0;
/* Sort the years for sensible ordering of parsing. */
qsort(years.data, years.size, sizeof *years.data, &void_int_cmp);
fprintf(stderr, "Years in <<%s>>: %s.\n",
argv[1], int_array_to_string(&years));
/* Go though each year. */
for(y = years.data, y_end = y + years.size; y < y_end; y++) {
char fn[64];
int *m, *m_end;
sprintf(fn, "%d", *y);
/* Get the months as directories. */
if(chdir(fn) == -1 || !(dir = opendir("."))) goto catch;
while((de = readdir(dir))) {
struct stat st;
int month, *p;
if(!(month = lex_looks_like_month(de->d_name))) continue;
if(stat(de->d_name, &st)) goto catch;
if(!S_ISDIR(st.st_mode)) continue;
if(!(p = int_array_new(&months))) goto catch;
*p = month;
}
closedir(dir), dir = 0;
qsort(months.data, months.size, sizeof *months.data, &void_int_cmp);
fprintf(stderr, "Months in <<%s>>: %s.)\n",
fn, int_array_to_string(&months));
/* Go though each month. */
for(m = months.data, m_end = m + months.size; m < m_end; m++) {
int *d, *d_end;
sprintf(fn, "%.2d", *m);
/* Get the days as files. */
if(chdir(fn) == -1 || !(dir = opendir("."))) goto catch;
while((de = readdir(dir))) {
struct stat st;
int day, *p;
/* fixme: Have yyyy-mm-dd to figure out how many days. */
if(!(day = lex_looks_like_day(de->d_name))) continue;
if(stat(de->d_name, &st)) goto catch;
if(S_ISDIR(st.st_mode)) continue;
if(!(p = int_array_new(&days))) goto catch;
*p = day;
}
closedir(dir), dir = 0;
qsort(days.data, days.size, sizeof *days.data, &void_int_cmp);
fprintf(stderr, "Days in <<%s>>: %s.\n",
fn, int_array_to_string(&days));
for(d = days.data, d_end = d + days.size; d < d_end; d++) {
struct lex *lex = 0;
struct page *page = 0;
union date32 d32;
if(!(d32 = date_to_32(*y, *m, *d)).year) { errno = EILSEQ;
intent = "date parse error"; goto syntax; }
sprintf(fn, "%.2d.txt", *d);
if(page_tree_bulk_add(&journal, d32, &page) != TREE_UNIQUE) {
if(!errno) intent = "not unique", errno = EDOM;
goto syntax;
}
page->entry = char_array();
page->meaning = lex_array();
if(!append_file(&page->entry, fn)) goto syntax;
int first = 1;
for(lex_reset(page->entry.data); ; ) {
if(!(lex = lex_array_new(&page->meaning))) goto syntax;
if(!lex_next(lex)) {
if(lex->symbol != END) { errno = EILSEQ; goto syntax; }
break; /* Terminated successfully. */
}
switch(lex->symbol) {
case TEXT: printf("%s%.*s",
first ? "" : " ", (int)(lex->s1 - lex->s0), lex->s0);
first = 0; break;
case PARAGRAPH: printf("\n" C_RESET); break;
case KJV_BOOK: printf(C_YELLOW "%.*s",
(int)(lex->s1 - lex->s0), lex->s0); break;
case KJV_CHAPTER_VERSE: printf(" ch. %.*s",
(int)(lex->s1 - lex->s0), lex->s0); break;
case KJV_TEXT: printf("%.*s",
(int)(lex->s1 - lex->s0), lex->s0); break;
case KJV_NEXT: printf("(next)\n"); break;
default:
fprintf(stderr, "%lu: %s",
(unsigned long)lex->line, lex_symbols[lex->symbol]);
if(lex->s0 && lex->s1) {
if(lex->s0 + INT_MAX < lex->s1)
intent = "line too long", errno = EILSEQ;
else
fprintf(stderr, " <<%.*s>>",
(int)(lex->s1 - lex->s0), lex->s0);
}
fprintf(stderr, ".\n");
break;
}
}
continue;
syntax:
fprintf(stderr, "On date: %d-%.2d-%.2d.\n", *y, *m, *d);
if(!page) goto catch;
if(!lex) { fprintf(stderr, "While parsing <<<\n%s>>>.\n",
page->entry.data); goto catch; }
for(struct lex_array_iterator it
= lex_array_iterator(&page->meaning);
lex = lex_array_next(&it); ) {
fprintf(stderr, "%lu: %s",
(unsigned long)lex->line, lex_symbols[lex->symbol]);
if(lex->s0 && lex->s1) {
if(lex->s0 + INT_MAX < lex->s1)
intent = "line too long", errno = EILSEQ;
else
fprintf(stderr, " <<%.*s>>",
(int)(lex->s1 - lex->s0), lex->s0);
}
fprintf(stderr, ".\n");
}
goto catch;
}
int_array_clear(&days);
if(chdir("..") == -1) goto catch;
}
int_array_clear(&months);
if(chdir("..") == -1) goto catch;
/* fixme: Expand, contact is the next thing that it doesn't get. */
if(*y == 1993/*1996*/) break;
}
page_tree_bulk_finish(&journal);
int_array_(&years), int_array_(&months), int_array_(&days);
fprintf(stderr, "Journal has entries: %s\n", page_tree_to_string(&journal));
/* Do something interesting? */
if(!bible_graph(&journal)) goto catch;
goto finally;
catch:
success = EXIT_FAILURE;
perror("interpret");
if(intent) fprintf(stderr, "Further explanation: %s.\n", intent);
finally:
if(dir && closedir(dir)) success = EXIT_FAILURE, perror("dir");
int_array_(&years), int_array_(&months), int_array_(&days);
struct page_tree_entry entry;
for(struct page_tree_iterator it = page_tree_begin(&journal);
(entry = page_tree_next(&it)).key; ) {
struct page *const page = entry.value;
char z[12];
date32_to_string(*entry.key, &z);
lex_array_(&page->meaning);
char_array_(&page->entry);
}
return success;
}
/* Popular KJV 738137 words. */
struct scan scan(const char *const buffer) {
struct scan scan;
scan.marker = scan.from = scan.cursor = scan.label = scan.buffer = buffer;
scan.condition = yycline;
scan.line = 1;
return scan;
}
int scan_next(struct scan *const s, struct lex *const x) {
/*!re2c /**/
re2c:flags:tags = 1;
re2c:define:YYCTYPE = char;
re2c:yyfill:enable = 0;
re2c:define:YYCURSOR = s->cursor;
re2c:define:YYMARKER = s->marker;
re2c:define:YYCONDTYPE = 'condition';
re2c:define:YYGETCONDITION = 's->condition';
re2c:define:YYGETCONDITION:naked = 1;
re2c:define:YYSETCONDITION = 's->condition = @@;';
re2c:define:YYSETCONDITION:naked = 1;
sentinel = "\x00";
newline = "\n";
unix_control = [\x01-\x08\x0a-\x1f\x7f];
ws = [ \t];
glyph = [^] \ (sentinel | unix_control | newline | ws);
keyword = ([a-zA-Z] | [0-9][0-9_\-]*[a-zA-Z]) [a-zA-Z0-9_\-]*;
decimal = "-"? ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0];
natural = [1-9][0-9]*;
date = "-"? natural "-" [0-1][0-9] "-" [0-1][0-9];
*/
const char *s0, *s1;
/*!stags:re2c format = 'const char *@@;\n'; */
assert(s && x);
if(!s->buffer) return 0;
x->line = s->line;
x->s0 = x->s1 = 0;
scan:
/*!re2c /**/
<*> unix_control { return x->symbol = ILLEGAL, 0; }
<*> * { return x->symbol = SYNTAX, 0; }
<*> sentinel /* New line always delimits. */
{ return x->symbol = s->condition == yycline ? END : ILLEGAL, 0; }
<expect_line> newline => line { x->line = ++s->line; goto scan; }
/* Symbols that go at the beginning of a line. */
<line> newline { x->line = ++s->line; goto scan; }
<line> "[" :=> edict
<line> "--" :=> source
<line> "->" :=> location
<line> "!" => text { return x->symbol = COMPLETE, 1; }
<line> "^" => text { return x->symbol = CANCELLED, 1; }
<line> "#" => text { return x->symbol = HEADING, 1; }
<line> * :=> text
<text> newline => line { x->line = ++s->line; goto scan; }
<text, bible> ws+ { goto scan; }
<text> @s0 glyph+ @s1
{ x->s0 = s0, x->s1 = s1; return x->symbol = TEXT, 1; }
<source> @s0 keyword @s1 => expect_line
{ x->s0 = s0, x->s1 = s1; return x->symbol = SOURCE_RECALL, 1; }
<location> "" / "(" :=> map
<location> "[" ws* @s0 keyword @s1 ws* "]"
{ x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION_SAVE, 1; }
<location> @s0 keyword @s1 => expect_line
{ x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION_RECALL, 1; }
<map> "(" @s0 decimal "," @s1 decimal ")" => expect_caption
{ x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION, 1; }
<edict> "source" :=> source
<edict> "ed" :=> ed
<edict> "contact" :=> contact
<edict> "glider" :=> glider
<edict> "flight" :=> flight
<edict> "bible" :=> bible
<edict> "book" :=> book
<edict> "movie" :=> movie
<edict> "tv" :=> tv
<edict> "medication" :=> medication
<edict> "idea" :=> idea
<edict> "vaccine" :=> vaccine
<edict> "in" :=> in
<edict> "" / natural :=> significant
<edict> [0-1][0-9] "-" [0-3][0-9]
", " [0-2][0-9] ":" [0-5][0-9] "] "
:=> text /* This is likely WhatsApp conversations. Ignore. */
/* missed, show, 'First, Second', 'Sounds', 'CSS', ..., 'Swanky', 'Shields' */
/* How did it get into my journal? */
<edict> "source"
{ if(s->is_ws_expected || s->edict.size)
return x->symbol = SYNTAX, 0;
s->is_ws_expected = 1, s->is_source = 1;
s->edict.size = 2;
s->edict.expect[1] = EXPECT_KEYWORD;
s->edict.expect[0] = EXPECT_END_TEXT;
return x->symbol = SOURCE, 1; }
<edict> "default"
{ if(s->is_ws_expected || !s->is_source)
return x->symbol = SYNTAX, 0;
s->is_ws_expected = 1, s->is_source = 0;
return x->symbol = DEFAULT, 1; }
/* Editorializing; looking back. */
<edict> "ed"
{ if(s->is_ws_expected || s->edict.size)
return x->symbol = SYNTAX, 0;
s->is_ws_expected = 1; /* no idea, just copy; probably should do sth */
s->edict.size = 1;
s->edict.expect[0] = EXPECT_END_TEXT; /* Pithy comment. */
return x->symbol = EDITORIALIZING, 1; }
/* Score. */
<edict> "significant"
{ if(s->is_ws_expected || s->edict.size)
return x->symbol = SYNTAX, 0;
s->is_ws_expected = 1;
s->edict.size = 3;
s->edict.expect[2] = EXPECT_NATURAL; /* Ordinal. */
s->edict.expect[1] = EXPECT_RESTRICT_TEXT; /* Name. */
s->edict.expect[0] = EXPECT_DATE; /* Birthday. */
return x->symbol = SIGNIFICANT, 1; }
<edict> @s0 natural @s1
{ if(s->is_ws_expected || s->edict.size)
return x->symbol = SYNTAX, 0;
s->is_ws_expected = 1;
x->s0 = s0, x->s1 = s1;
return x->symbol = SIGNIFICANT_RECALL, 1; }
/* General [edict: whatever]. */
<edict> ws+ { s->is_ws_expected = 0; goto scan; }
<edict> ":"
{ if(!s->edict.size) return x->symbol = SYNTAX, 0;
s->is_ws_expected = 0, s->is_source = 0;
expect_pop(); goto scan; }
<edict_keyword> ws* @s0 keyword @s1 ws* ";"?
{ x->s0 = s0, x->s1 = s1; expect_pop();
return x->symbol = ARG_KEYWORD, 1; }
<edict_date> ws* @s0 date @s1 ws* ";"?
{ x->s0 = s0, x->s1 = s1; expect_pop();
return x->symbol = ARG_DATE, 1; }
<edict_natural> ws* @s0 natural @s1 ws* ";"?
{ x->s0 = s0, x->s1 = s1; expect_pop();
return x->symbol = ARG_NATURAL, 1; }
<edict_restrict_text>
ws* @s0 (glyph \ [;[\]]) ((glyph \ [;[\]]) | ws)* @s1 ws* ";"?
{ x->s0 = s0, x->s1 = s1; expect_pop();
return x->symbol = ARG_RESTRICT_TEXT, 1; }
<edict_end_text>
ws* @s0 (glyph \ [[\]]) ((glyph \ [[\]]) | ws)* @s1 ws*
{ x->s0 = s0, x->s1 = s1; expect_pop();
return x->symbol = ARG_END_TEXT, 1; }
<edict, edict_end> "]" => expect_line
{ if(s->edict.size) return 0; goto scan; }
*/
}
#define FOR_SYMBOL(X) \
\
/* Results. */ \
X(END, 0), \
X(SYNTAX, 0), \
X(ILLEGAL, 0), \
X(NOT_FOUND, 0), \
\
X(ORDERED_LIST_ITEM, &no_vt), \
X(LIST_ITEM, 0), \
X(COMPLETE, 0), \
X(CANCELLED, 0), \
X(HEADING, 0), \
/* Text. */ \
X(PARAGRAPH, 0), \
X(TEXT, &word_vt), \
/*This is lazy.*/X(CAPTION, &word_vt), \
\
/* Edicts. */ \
X(SOURCE, &word_vt), \
X(DEFAULT, 0), \
X(SOURCE_RECALL, &word_vt), \
X(LOCATION, 0), \
X(LOCATION_SAVE, 0), \
X(LOCATION_RECALL, 0), \
X(SIGNIFICANT, 0), \
X(SIGNIFICANT_RECALL, 0), \
X(EDITORIALIZING, 0), \
\
/* Arguments. */ \
X(ARG_KEYWORD, 0), \
X(ARG_DATE, &date_vt), \
X(ARG_NATURAL, &no_vt), \
X(ARG_RESTRICT_TEXT, &word_vt), \
X(ARG_END_TEXT, &word_vt), \
\
/* Bible */ \
X(KJV_BOOK, &kjv_book_vt), \
X(KJV_CHAPTER_VERSE, &word_vt), \
X(KJV_TEXT, &word_vt), \
X(KJV_NEXT, 0)
int scan(union date32 date, const char *const buffer);
struct scan {
const char *marker, *from, *cursor, *limit, *label, *buffer;
int condition;
size_t line;
int is_ws_expected;
};
struct lex {
size_t line;
#define ARG1(n, m) n
enum lex_symbol { FOR_SYMBOL(ARG1) } symbol;
#undef ARG1
const char *s0, *s1;
};
#define STR1(n, m) #n
static const char *const lex_symbols[] = { FOR_SYMBOL(STR1) };
#undef X
struct scan scan(const char *);
int scan_next(struct scan *const s, struct lex *const x);

View File

@ -1,74 +0,0 @@
#include "journal.h"
int lex_looks_like_year(const char *, int *);
int lex_looks_like_month(const char *);
int lex_looks_like_day(const char *);
#define FOR_SYMBOL(X) \
\
/* Results. */ \
X(END, 0), \
X(SYNTAX, 0), \
X(ILLEGAL, 0), \
X(NOT_FOUND, 0), \
\
X(ORDERED_LIST_ITEM, &no_vt), \
X(LIST_ITEM, 0), \
X(COMPLETE, 0), \
X(CANCELLED, 0), \
X(HEADING, 0), \
/* Text. */ \
X(PARAGRAPH, 0), \
X(TEXT, &word_vt), \
/*This is lazy.*/X(CAPTION, &word_vt), \
\
/* Edicts. */ \
X(SOURCE, &word_vt), \
X(DEFAULT, 0), \
X(SOURCE_RECALL, &word_vt), \
X(LOCATION, 0), \
X(LOCATION_SAVE, 0), \
X(LOCATION_RECALL, 0), \
X(SIGNIFICANT, 0), \
X(SIGNIFICANT_RECALL, 0), \
X(EDITORIALIZING, 0), \
\
/* Arguments. */ \
X(ARG_KEYWORD, 0), \
X(ARG_DATE, &date_vt), \
X(ARG_NATURAL, &no_vt), \
X(ARG_RESTRICT_TEXT, &word_vt), \
X(ARG_END_TEXT, &word_vt), \
\
/* Bible */ \
X(KJV_BOOK, &kjv_book_vt), \
X(KJV_CHAPTER_VERSE, &word_vt), \
X(KJV_TEXT, &word_vt), \
X(KJV_NEXT, 0)
int scan(union date32 date, const char *const buffer);
#if 0
struct scan {
const char *marker, *from, *cursor, *limit, *label, *buffer;
int condition;
size_t line;
int is_ws_expected;
};
struct lex {
size_t line;
#define ARG1(n, m) n
enum lex_symbol { FOR_SYMBOL(ARG1) } symbol;
#undef ARG1
const char *s0, *s1;
};
#define STR1(n, m) #n
static const char *const lex_symbols[] = { FOR_SYMBOL(STR1) };
#undef X
struct scan scan(const char *);
int scan_next(struct scan *const s, struct lex *const x);
#endif

View File

@ -1,228 +0,0 @@
/** @license 2022 Neil Edelman, distributed under the terms of the
[MIT License](https://opensource.org/licenses/MIT).
Lexer for journal entries.
"^[edict: expect; there; to; be; args] Something.\n"
"^[<num>] Something.\n"
"<num>+.<num>{2}(<num>)$"
"<bookref> -- \"*\n"
@std C89/90 */
#include "../src/scan.h"
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <limits.h>
#include <errno.h>
/* This defines `enum condition`. */
/*!types:re2c*/
int scan(union date32 date, const char *const buffer) {
const char *YYCURSOR = buffer;
/*!re2c /**/
re2c:define:YYCTYPE = char;
re2c:yyfill:enable = 0;
*/
}
#if 0
struct scan scan(const char *const buffer) {
struct scan scan;
scan.marker = scan.from = scan.cursor = scan.label = scan.buffer = buffer;
scan.condition = yycline;
scan.line = 1;
return scan;
}
int scan_next(struct scan *const s, struct lex *const x) {
/*!re2c /**/
re2c:flags:tags = 1;
re2c:define:YYCTYPE = char;
re2c:yyfill:enable = 0;
re2c:define:YYCURSOR = s->cursor;
re2c:define:YYMARKER = s->marker;
re2c:define:YYCONDTYPE = 'condition';
re2c:define:YYGETCONDITION = 's->condition';
re2c:define:YYGETCONDITION:naked = 1;
re2c:define:YYSETCONDITION = 's->condition = @@;';
re2c:define:YYSETCONDITION:naked = 1;
sentinel = "\x00";
newline = "\n";
unix_control = [\x01-\x08\x0a-\x1f\x7f];
ws = [ \t];
glyph = [^] \ (sentinel | unix_control | newline | ws);
keyword = ([a-zA-Z] | [0-9][0-9_\-]*[a-zA-Z]) [a-zA-Z0-9_\-]*;
decimal = "-"? ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0];
natural = [1-9][0-9]*;
date = "-"? natural "-" [0-1][0-9] "-" [0-1][0-9];
*/
const char *s0, *s1;
/*!stags:re2c format = 'const char *@@;\n'; */
assert(s && x);
if(!s->buffer) return 0;
x->line = s->line;
x->s0 = x->s1 = 0;
scan:
/*!re2c /**/
<*> unix_control { return x->symbol = ILLEGAL, 0; }
<*> * { return x->symbol = SYNTAX, 0; }
<*> sentinel /* New line always delimits. */
{ return x->symbol = s->condition == yycline ? END : ILLEGAL, 0; }
<expect_line> newline => line { x->line = ++s->line; goto scan; }
/* Symbols that go at the beginning of a line. */
<line> newline { x->line = ++s->line; goto scan; }
<line> "[" :=> edict
<line> "--" :=> source
<line> "->" :=> location
<line> "!" => text { return x->symbol = COMPLETE, 1; }
<line> "^" => text { return x->symbol = CANCELLED, 1; }
<line> "#" => text { return x->symbol = HEADING, 1; }
<line> * :=> text
<text> newline => line { x->line = ++s->line; goto scan; }
<text, bible> ws+ { goto scan; }
<text> @s0 glyph+ @s1
{ x->s0 = s0, x->s1 = s1; return x->symbol = TEXT, 1; }
bible_ref = natural ":" natural [ab]? ("-" (natural ":")? natural [ab]?)?;
glyph_minus = glyph \ ['];
<text> @s0 ("Genesis" | "Exodus" | "Leviticus" | "Numbers" | "Deuteronomy"
| "Joshua" | "Judges" | "Ruth" | "I"{1,2} " Samuel" | "I"{1,2} " Kings"
| "I"{1,2} " Chronicles" | "Ezra" | "Nehemiah" | "Esther" | "Job"
| "Psalms" | "Proverbs" | "Ecclesiastes" | "Song of Solomon" | "Isaiah"
| "Jeremiah" | "Lamentations" | "Ezekiel" | "Daniel" | "Hosea" | "Joel"
| "Amos" | "Obadiah" | "Jonah" | "Micah" | "Nahum" | "Habakkuk"
| "Zephaniah" | "Haggai" | "Zechariah" | "Malachi" | "Matthew" | "Mark"
| "Luke" | "John" | "Acts" | "Romans" | "I"{1,2} " Corinthians"
| "Galatians" | "Ephesians" | "Philippians" | "Colossians"
| "I"{1,2} " Thessalonians" | "I"{1,2} " Timothy" | "Titus" | "Philemon"
| "Hebrews" | "James" | "I"{1,2} " Peter" | "I"{1,3} " John" | "Jude"
| "Revelation") @s1 ws* / bible_ref ws+ "--" ws+ "``"
=> bible { x->s0 = s0, x->s1 = s1; return x->symbol = KJV_BOOK, 1; }
<bible> @s0 bible_ref @s1 ws+ "--" ws+ "``"
{ x->s0 = s0, x->s1 = s1; return x->symbol = KJV_CHAPTER_VERSE, 1; }
<bible> "``" { return x->symbol = KJV_NEXT, 1; }
<bible> "''" :=> text
/* fixme: This is a hack that doesn't allow apostrophes at the end of a
word, (not sure there are any in the bible.) Is ' terminated by '';
otherwise same as glyph+ above. */
<bible> @s0 ("'"? glyph_minus+ ("'" glyph_minus+)*) @s1
{ x->s0 = s0, x->s1 = s1; return x->symbol = KJV_TEXT, 1; }
/* Multiple verses can be present, but they end in ''.
Not strictly enforced. */
<bible> newline / (newline | "``") { x->line = ++s->line; goto scan; }
<bible> newline { return x->symbol = SYNTAX, 0; }
<source> @s0 keyword @s1 => expect_line
{ x->s0 = s0, x->s1 = s1; return x->symbol = SOURCE_RECALL, 1; }
<location> "" / "(" :=> map
<location> "[" ws* @s0 keyword @s1 ws* "]"
{ x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION_SAVE, 1; }
<location> @s0 keyword @s1 => expect_line
{ x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION_RECALL, 1; }
<map> "(" @s0 decimal "," @s1 decimal ")" => expect_caption
{ x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION, 1; }
<edict> "source" :=> source
<edict> "ed" :=> ed
<edict> "contact" :=> contact
<edict> "glider" :=> glider
<edict> "flight" :=> flight
<edict> "bible" :=> bible
<edict> "book" :=> book
<edict> "movie" :=> movie
<edict> "tv" :=> tv
<edict> "medication" :=> medication
<edict> "idea" :=> idea
<edict> "vaccine" :=> vaccine
<edict> "in" :=> in
<edict> "" / natural :=> significant
<edict> [0-1][0-9] "-" [0-3][0-9]
", " [0-2][0-9] ":" [0-5][0-9] "] "
:=> text /* This is likely WhatsApp conversations. Ignore. */
/* missed, show, 'First, Second', 'Sounds', 'CSS', ..., 'Swanky', 'Shields' */
/* How did it get into my journal? */
<edict> "source"
{ if(s->is_ws_expected || s->edict.size)
return x->symbol = SYNTAX, 0;
s->is_ws_expected = 1, s->is_source = 1;
s->edict.size = 2;
s->edict.expect[1] = EXPECT_KEYWORD;
s->edict.expect[0] = EXPECT_END_TEXT;
return x->symbol = SOURCE, 1; }
<edict> "default"
{ if(s->is_ws_expected || !s->is_source)
return x->symbol = SYNTAX, 0;
s->is_ws_expected = 1, s->is_source = 0;
return x->symbol = DEFAULT, 1; }
/* Editorializing; looking back. */
<edict> "ed"
{ if(s->is_ws_expected || s->edict.size)
return x->symbol = SYNTAX, 0;
s->is_ws_expected = 1; /* no idea, just copy; probably should do sth */
s->edict.size = 1;
s->edict.expect[0] = EXPECT_END_TEXT; /* Pithy comment. */
return x->symbol = EDITORIALIZING, 1; }
/* Score. */
<edict> "significant"
{ if(s->is_ws_expected || s->edict.size)
return x->symbol = SYNTAX, 0;
s->is_ws_expected = 1;
s->edict.size = 3;
s->edict.expect[2] = EXPECT_NATURAL; /* Ordinal. */
s->edict.expect[1] = EXPECT_RESTRICT_TEXT; /* Name. */
s->edict.expect[0] = EXPECT_DATE; /* Birthday. */
return x->symbol = SIGNIFICANT, 1; }
<edict> @s0 natural @s1
{ if(s->is_ws_expected || s->edict.size)
return x->symbol = SYNTAX, 0;
s->is_ws_expected = 1;
x->s0 = s0, x->s1 = s1;
return x->symbol = SIGNIFICANT_RECALL, 1; }
/* General [edict: whatever]. */
<edict> ws+ { s->is_ws_expected = 0; goto scan; }
<edict> ":"
{ if(!s->edict.size) return x->symbol = SYNTAX, 0;
s->is_ws_expected = 0, s->is_source = 0;
expect_pop(); goto scan; }
<edict_keyword> ws* @s0 keyword @s1 ws* ";"?
{ x->s0 = s0, x->s1 = s1; expect_pop();
return x->symbol = ARG_KEYWORD, 1; }
<edict_date> ws* @s0 date @s1 ws* ";"?
{ x->s0 = s0, x->s1 = s1; expect_pop();
return x->symbol = ARG_DATE, 1; }
<edict_natural> ws* @s0 natural @s1 ws* ";"?
{ x->s0 = s0, x->s1 = s1; expect_pop();
return x->symbol = ARG_NATURAL, 1; }
<edict_restrict_text>
ws* @s0 (glyph \ [;[\]]) ((glyph \ [;[\]]) | ws)* @s1 ws* ";"?
{ x->s0 = s0, x->s1 = s1; expect_pop();
return x->symbol = ARG_RESTRICT_TEXT, 1; }
<edict_end_text>
ws* @s0 (glyph \ [[\]]) ((glyph \ [[\]]) | ws)* @s1 ws*
{ x->s0 = s0, x->s1 = s1; expect_pop();
return x->symbol = ARG_END_TEXT, 1; }
<edict, edict_end> "]" => expect_line
{ if(s->edict.size) return 0; goto scan; }
*/
}
#endif

1
src/scan_kjv.h Normal file
View File

@ -0,0 +1 @@

100
src/scan_kjv.re.c Normal file
View File

@ -0,0 +1,100 @@
/** @license 2022 Neil Edelman, distributed under the terms of the
[MIT License](https://opensource.org/licenses/MIT).
Scan journal entries for kjv references. */
#include "../src/journal.h"
#include "../src/kjv.h"
#include "../src/helper.h"
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <limits.h>
/*!conditions:re2c*/
static int scan(union date32 date, const char *const buffer) {
const char *YYCURSOR, *YYMARKER, *yyt1, *yyt2, *yyt3, *s0, *s1, *t0, *t1;
enum kjv_book book = Revelation;
uint32_t chapter, verse;
enum YYCONDTYPE condition = yycline;
size_t line = 1;
YYCURSOR = YYMARKER = yyt1 = buffer;
/*!re2c /**/
re2c:define:YYCTYPE = char;
re2c:yyfill:enable = 0;
re2c:define:YYGETCONDITION = "condition";
re2c:define:YYSETCONDITION = "condition = @@;";
re2c:define:YYGETCONDITION:naked = 1;
re2c:define:YYSETCONDITION:naked = 1;
unix_control = [\x01-\x08\x0a-\x1f\x7f];
ws = [ \t];
glyph = [^] \ ("\x00" | "\n" | unix_control | ws);
natural = [1-9][0-9]*;
lookat = ws* natural ":" natural [ab]?
("-" (natural ":")? natural [ab]?)? ws* "--";
*/
for( ; ; ) { /*!re2c /**/
<skip, book> * { printf("*! %zu\n", line);goto error; }
<line> "\x00" { printf("yes!\n");return 1; }
<line> * :=> skip
<line> "Genesis" / lookat => book { book = Genesis; continue; }
<line> "Exodus" / lookat => book { book = Exodus; continue; }
/*| "Leviticus" | "Numbers" | "Deuteronomy"
| "Joshua" | "Judges" | "Ruth" | "I"{1,2} " Samuel" | "I"{1,2} " Kings"
| "I"{1,2} " Chronicles" | "Ezra" | "Nehemiah" | "Esther" | "Job"
| "Psalms" | "Proverbs" | "Ecclesiastes" | "Song of Solomon" | "Isaiah"
| "Jeremiah" | "Lamentations" | "Ezekiel" | "Daniel" | "Hosea" | "Joel"
| "Amos" | "Obadiah" | "Jonah" | "Micah" | "Nahum" | "Habakkuk"
| "Zephaniah" | "Haggai" | "Zechariah" | "Malachi" | "Matthew" | "Mark"
| "Luke" | "John" | "Acts" | "Romans" | "I"{1,2} " Corinthians"
| "Galatians" | "Ephesians" | "Philippians" | "Colossians"
| "I"{1,2} " Thessalonians" | "I"{1,2} " Timothy" | "Titus" | "Philemon"
| "Hebrews" | "James" | "I"{1,2} " Peter" | "I"{1,3} " John" | "Jude"
| "Revelation") @s1 ws* / bible_ref ws+ "--" ws+ "``" */
//<line> [^\n\x00]* newline { printf("throw\n"); line++; continue; }
<book> ws+ @s0 natural @s1 ":" @t0 natural @t1 [ab]? => skip {
if(!helper_natural(s0, s1, &chapter)
|| !helper_natural(t0, t1, &verse)) goto error;
union kjvcite c
= { .book = book, .chapter = chapter, .verse = verse };
char a[12];
kjvcite_to_string(c, &a), printf("Parsed %s\n", a);
continue;
}
<skip> [^\n\x00]* "\n" => line { line++; continue; }
//=> bible { x->s0 = s0, x->s1 = s1; return x->symbol = KJV_BOOK, 1; }
*/ }
error:
{
char a[12];
date32_to_string(date, &a);
fprintf(stderr, "%s line %zu: unexpected.\n", a, line);
}
return 0;
}
int main(void) {
struct journal j = journal();
int success = EXIT_SUCCESS;
struct journal_iterator it;
union date32 k;
union load *v;
if(!journal_is_valid(&j)) goto catch;
printf("Journal: %s.\n", journal_to_string(&j));
if(!scan((union date32){ .u32 = 42 }, "Genesis 1:1 -- fdgYo.\n"
"Exodus 1:2 -- fuck\n"
"no\n")) fprintf(stderr, "Error :[\n");
it = journal_begin(&j); while(journal_next(&it, &k, &v)) {
char a[12];
date32_to_string(k, &a), printf("%s: %s\n", a, v->text);
}
goto finally;
catch:
success = EXIT_FAILURE;
perror("journal");
finally:
journal_(&j);
return success;
}