From 940d3052587ea641072f2915cb240bdc9fd3909d Mon Sep 17 00:00:00 2001 From: Neil Date: Wed, 28 Dec 2022 23:54:59 -0800 Subject: [PATCH] transferred from offsets to pointers --- Makefile | 12 +- src/helper.h | 16 ++ src/journal.h | 13 ++ src/journal.re.c | 66 +++--- src/kjv.h | 1 + src/kjv.re.c | 32 +-- src/not-used.c | 550 ++++++++++++++++++++++++++++++++++++++++++++++ src/scan.h | 74 ------- src/scan.re.c | 228 ------------------- src/scan_kjv.h | 1 + src/scan_kjv.re.c | 100 +++++++++ 11 files changed, 727 insertions(+), 366 deletions(-) create mode 100644 src/helper.h create mode 100644 src/not-used.c delete mode 100644 src/scan.h delete mode 100644 src/scan.re.c create mode 100644 src/scan_kjv.h create mode 100644 src/scan_kjv.re.c diff --git a/Makefile b/Makefile index f315391..4ff5afd 100644 --- a/Makefile +++ b/Makefile @@ -29,16 +29,16 @@ else CF += -g endif -projects := bin/test-text bin/test-kjv bin/test-journal bin/interpret +projects := bin/test-text bin/test-kjv bin/test-journal bin/kjv #docs := $(patsubst test/test_%.c, doc/%.md, $(wildcard test/test_*.c)) default: $(projects) # success -bin/test-text: build/test_text.o build/text.o -bin/test-kjv: build/test_kjv.o build/text.o build/kjv.o -bin/test-journal: build/test_journal.o build/text.o build/journal.o -bin/interpret: build/interpret.o build/scan.o build/journal.o build/kjv.o build/text.o +bin/test-text: build/text.o build/test_text.o +bin/test-kjv: build/text.o build/kjv.o build/test_kjv.o +bin/test-journal: build/text.o build/journal.o build/test_journal.o +bin/kjv: build/text.o build/journal.o build/kjv.o build/scan_kjv.o bin/%: @echo "\033[1;36mlinking $@\033[0m" @@ -68,7 +68,7 @@ build/%.c: src/%.re.c # # https://github.com/neil-edelman/cdoc documentation # -cdoc -o $@ $< -.SECONDARY: build/kjv.c build/journal.c build/scan.c +.SECONDARY: build/kjv.c build/journal.c build/scan_kjv.c .PHONY: clean release test test: $(projects) diff --git a/src/helper.h b/src/helper.h new file mode 100644 index 0000000..6601e2d --- /dev/null +++ b/src/helper.h @@ -0,0 +1,16 @@ +#include +#include + +/** Parse unsigned; [`s`,`e`) => `n`. */ +static int helper_natural(const char *s, const char *const e, uint32_t *const n) +{ + uint32_t accum = 0; + while(s < e) { + unsigned next = accum * 10 + (unsigned)(*s - '0'); + if(accum >= next) return errno = ERANGE, 0; + accum = next; + s++; + } + *n = accum; + return 1; +} diff --git a/src/journal.h b/src/journal.h index cbd2526..9165eab 100644 --- a/src/journal.h +++ b/src/journal.h @@ -1,4 +1,6 @@ #ifndef OMIT_BASE /* */ @@ -15,6 +18,11 @@ union date32 { struct tree_day_node; struct tree_day_tree { struct tree_day_node *node; unsigned height; }; struct day_tree { struct tree_day_tree root; }; +struct tree_day_ref { struct tree_day_node *node; unsigned height, idx; }; +struct tree_day_iterator { + struct tree_day_tree *root; struct tree_day_ref ref; int seen; +}; +struct day_tree_iterator { struct tree_day_iterator _; }; #else /* page --> */ @@ -23,10 +31,15 @@ struct day_tree { struct tree_day_tree root; }; #ifndef OMIT_PROTO /* */ diff --git a/src/journal.re.c b/src/journal.re.c index d48bb3f..8f77deb 100644 --- a/src/journal.re.c +++ b/src/journal.re.c @@ -12,8 +12,7 @@ #include /* opendir readdir closedir */ -union load { const char *text; size_t offset; }; -static void date32_to_string(const union date32 d, char (*const a)[12]) { +void date32_to_string(const union date32 d, char (*const a)[12]) { sprintf(*a, "%" PRIu32 "-%2.2" PRIu32 "-%2.2" PRIu32, d.year % 10000, d.month % 100, d.day % 100); } @@ -101,39 +100,6 @@ static unsigned looks_like_day(const char *const a) { */ } -#if 0 -/** Is `y` a leap-year? */ -static int leap(int y) { - assert(y >= 1582); - if(!(y % 400)) return 1; - if(!(y % 100)) return 0; - if(!(y % 4)) return 1; - return 0; -} -/** @return Pack into `date32` or return zero. */ -static union date32 date_to_32(const int y, const int m, const int d) { - union date32 d32 = { 0 }; - /* Leap year calculations only work at y>=1 and Gregorian Calendar and max - 23 bits. */ - if(y < 1582 || y > 8388607 || m < 1 || m > 12 || d < 1 || d > 31) goto no; - switch(m) { - case 1: case 3: case 5: case 7: case 8: case 10: case 12: break; - case 4: case 6: case 9: case 11: if(d > 30) goto no; break; - case 2: if(d > 28 + leap(y)) goto no; break; - default: assert(0); break; - } - d32.year = (unsigned)y, d32.month = (unsigned)m, d32.day = (unsigned)d; -no: - return d32; -} -/** Tomohiko Sakamoto comp.lang.c 1993-04-10. */ -static unsigned weekday(union date32 d) { - d.year -= d.month < 3; - return (d.year + d.year / 4 - d.year / 100 + d.year / 400 - + "-bed=pen+mad."[d.month] + d.day) % 7; -} -#endif - #define OMIT_BASE #define OMIT_DAY @@ -157,6 +123,8 @@ struct journal journal(void) { struct int_array years = int_array(), months = int_array(), days = int_array(); int *y = 0, *y_end, *m = 0, *m_end, *d = 0, *d_end; + struct day_tree_iterator it; + union load *v; /* Get the years list as directories matching a year. */ if(chdir(dir_journal) == -1 || !(dir = opendir("."))) goto catch; @@ -236,8 +204,13 @@ struct journal journal(void) { if(chdir("..") == -1) goto catch; /*if(*y == 1993) break;*/ } - day_tree_bulk_finish(&j.days); - if(chdir("..") == -1) goto catch; + if(chdir("..") == -1 || !day_tree_bulk_finish(&j.days)) goto catch; + /* Structure is now stable. */ + it = day_tree_begin(&j.days); while(day_tree_next(&it, 0, &v)) { + /*printf("%zu\n", v->offset);*/ + v->text = j.backing.a.data + v->offset; + /*printf("%.60s\n", v->text);*/ + } /*fprintf(stderr, "Journal has entries: %s\n", day_tree_to_string(&j.days));*/ goto finally; @@ -262,3 +235,22 @@ int journal_is_valid(const struct journal *const j) { /** @return `j` as a string. */ const char *journal_to_string(const struct journal *const j) { return day_tree_to_string(&j->days); } + +struct journal_iterator journal_begin(struct journal *const j) { + struct journal_iterator it; + it._ = day_tree_begin(&j->days); + return it; +} + +struct journal_iterator journal_begin_at(struct journal *const j, + const union date32 x) { + struct journal_iterator it; + it._ = day_tree_begin_at(&j->days, x); + return it; +} + +int journal_next(struct journal_iterator *const it, + union date32 *const k, union load **v) { + return day_tree_next(&it->_, k, v); +} + diff --git a/src/kjv.h b/src/kjv.h index 0326345..f1a377e 100644 --- a/src/kjv.h +++ b/src/kjv.h @@ -80,6 +80,7 @@ union kjvcite { struct { uint32_t verse : 12, chapter : 13, book : 7; }; /* C11, reverse */ uint32_t u32; }; +void kjvcite_to_string(const union kjvcite, char (*)[12]); #else /* base --> */ diff --git a/src/kjv.re.c b/src/kjv.re.c index 848fe38..0b38c4e 100644 --- a/src/kjv.re.c +++ b/src/kjv.re.c @@ -11,16 +11,21 @@ #define OMIT_VERSES #define OMIT_PROTO #include "../src/kjv.h" /* Just the base data. */ +#include "../src/helper.h" #include #include #include -#include #include #include /* opendir readdir closedir */ #include /* chdir (POSIX) (because I'm lazy) */ /* #include No; overkill. */ +void kjvcite_to_string(const union kjvcite x, char (*const a)[12]) + { sprintf(*a, "%.4s%" PRIu32 ":%" PRIu32, + kjv_book_string[x.book < KJV_BOOK_SIZE ? x.book : KJV_BOOK_SIZE], + x.chapter % 1000, x.verse % 1000); } + /* Reversible hash map. */ /** on `x`. */ @@ -46,8 +51,7 @@ static uint32_t kjvset_hash(const union kjvcite x) { return lowbias32(x.u32); } static union kjvcite kjvset_unhash(const uint32_t x) { union kjvcite k; k.u32 = lowbias32_r(x); return k; } static void kjvset_to_string(const union kjvcite x, char (*const a)[12]) - { sprintf(*a, "%.4s%" PRIu32 ":%" PRIu32, kjv_book_string[x.book], - x.chapter % 1000, x.verse % 1000); } + { kjvcite_to_string(x, a); } #define TABLE_NAME kjvset #define TABLE_KEY union kjvcite #define TABLE_UINT uint32_t @@ -70,20 +74,6 @@ static void verse_to_string(const union kjvcite x, const unsigned count, #include "../src/table.h" -/** Helper to parse unsigned; [`s`,`e`) => `n`. */ -static int parse_natural(const char *s, const char *const e, unsigned *const n) { - unsigned accum = 0; - while(s < e) { - unsigned next = accum * 10 + (unsigned)(*s - '0'); - if(accum >= next) return errno = ERANGE, 0; - accum = next; - s++; - } - *n = accum; - return 1; -} - - /* Parse filename of books. */ /*!re2c /**/ re2c:yyfill:enable = 0; @@ -101,7 +91,7 @@ static int looks_like_book_fn(const char *fn, unsigned *const book_no) { * { return 0; } @s0 natural @s1 [^.\x00]* ".txt" "\x00" - { return parse_natural(s0, s1, book_no); } + { return helper_natural(s0, s1, book_no); } */ } @@ -111,7 +101,7 @@ struct lex { size_t line; const char *cursor; int error; - unsigned chapter, verse, words; + uint32_t chapter, verse, words; }; static struct lex lex(const char *cursor) { struct lex lex; @@ -140,8 +130,8 @@ scan: [^[\]\n\x00]* "\n" { lex->line++; goto scan; } "\x00" { return 0; } "[" @s0 natural @s1 ":" @t0 natural @t1 "]" => verse { - if(!parse_natural(s0, s1, &lex->chapter) - || !parse_natural(t0, t1, &lex->verse)) + if(!helper_natural(s0, s1, &lex->chapter) + || !helper_natural(t0, t1, &lex->verse)) return errno = EILSEQ, lex->error = 1, 0; lex->words = 0; /*printf("%u:%u", lex->chapter, lex->verse);*/ diff --git a/src/not-used.c b/src/not-used.c new file mode 100644 index 0000000..1913d9c --- /dev/null +++ b/src/not-used.c @@ -0,0 +1,550 @@ +/** Is `y` a leap-year? */ +static int leap(int y) { + assert(y >= 1582); + if(!(y % 400)) return 1; + if(!(y % 100)) return 0; + if(!(y % 4)) return 1; + return 0; +} +/** Convert or narrower type or return zero. */ +static union date32 date_to_32(const int y, const int m, const int d) { + union date32 d32 = { 0 }; + /* Leap year calculations only work at y>=1 and Gregorian Calendar and max + 23 bits. */ + if(y < 1582 || y > 8388607 || m < 1 || m > 12 || d < 1 || d > 31) goto no; + switch(m) { + case 1: case 3: case 5: case 7: case 8: case 10: case 12: break; + case 4: case 6: case 9: case 11: if(d > 30) goto no; break; + case 2: if(d > 28 + leap(y)) goto no; break; + default: assert(0); break; + } + d32.year = (unsigned)y, d32.month = (unsigned)m, d32.day = (unsigned)d; +no: + return d32; +} +/** Tomohiko Sakamoto comp.lang.c 1993-04-10. */ +static unsigned weekday(union date32 d) { + d.year -= d.month < 3; + return (d.year + d.year / 4 - d.year / 100 + d.year / 400 + + "-bed=pen+mad."[d.month] + d.day) % 7; +} +/* +### plot with steps +reset session + +$Data <meaning); + (lex = lex_array_next(&l_it)); ) { + switch(lex->symbol) { + case KJV_BOOK: + if(state != CHILL && state != WORD) goto catch; + if(state == WORD) printf("\n"); + fprintf(stderr, "%d-%.2d-%.2d: \"%.*s\", ", + entry.key->year, entry.key->month, entry.key->day, + (int)(lex->s1 - lex->s0), lex->s0); + state = BOOK; + break; + case KJV_CHAPTER_VERSE: + if(state != BOOK) goto catch; + printf("\"%.*s\", \"", (int)(lex->s1 - lex->s0), lex->s0); + state = CHAPTER; + break; + case KJV_NEXT: + if(state != WORD) goto catch; + printf("\"\n"); + break; + case KJV_TEXT: + if(state != WORD && state != CHAPTER && state != NEXT) + goto catch; + printf("%s%.*s<%d>", state == WORD ? "*" : "", + (int)(lex->s1 - lex->s0 < 0 ? 10 : lex->s1 - lex->s0), + lex->s0, (int)(lex->s1 - lex->s0)); + count++; + state = WORD; + break; + default: + if(state != CHILL && state != WORD) goto catch; + if(state == WORD) printf("\"\n"), state = CHILL; + break; + } + } + if(state != CHILL && state != WORD) goto catch; + if(state == WORD) printf("\n"), state = CHILL; + } + printf("Count: %lu.\n", (unsigned long)count); + return 1; +catch: + fprintf(stderr, "Bible error.\n"); + if(entry.key) { + fprintf(stderr, "On date: %d-%.2d-%.2d.\n", + entry.key->year, entry.key->month, entry.key->day); + if(lex) fprintf(stderr, "At line %lu.\n", (unsigned long)lex->line); + } + errno = EILSEQ; + return 0; +} + +#define C_BLACK "\033[0;30m" +#define C_RED "\033[0;31m" +#define C_GREEN "\033[0;32m" +#define C_YELLOW "\033[0;33m" +#define C_BLUE "\033[0;34m" +#define C_PURPLE "\033[0;35m" +#define C_CYAN "\033[0;36m" +#define C_WHITE "\033[0;37m" + +#define CB_BLACK "\033[1;30m" +#define CB_RED "\033[1;31m" +#define CB_GREEN "\033[1;32m" +#define CB_YELLOW "\033[1;33m" +#define CB_BLUE "\033[1;34m" +#define CB_PURPLE "\033[1;35m" +#define CB_CYAN "\033[1;36m" +#define CB_WHITE "\033[1;37m" + +#define C_RESET "\033[0m" + +int main(int argc, char **argv) { + int success = EXIT_SUCCESS; + char *intent = 0; + + /* For reading in files, overwritten. */ + DIR *dir = 0; + struct dirent *de; + struct int_array years = int_array(), months = int_array(), + days = int_array(); + int *y, *y_end; + + struct page_tree journal = page_tree(); + + errno = 0; + if(argc != 2) { intent = "needs journal location, which should" + " contain //.txt"; goto catch; } + + /* Get the years list as directories matching a year. */ + if(chdir(argv[1]) == -1 || !(dir = opendir("."))) goto catch; + while((de = readdir(dir))) { + struct stat st; + int year, *p; + if(!lex_looks_like_year(de->d_name, &year)) continue; + if(stat(de->d_name, &st)) goto catch; + if(!S_ISDIR(st.st_mode)) continue; + if(!(p = int_array_new(&years))) goto catch; + *p = year; + } + closedir(dir), dir = 0; + /* Sort the years for sensible ordering of parsing. */ + qsort(years.data, years.size, sizeof *years.data, &void_int_cmp); + fprintf(stderr, "Years in <<%s>>: %s.\n", + argv[1], int_array_to_string(&years)); + + /* Go though each year. */ + for(y = years.data, y_end = y + years.size; y < y_end; y++) { + char fn[64]; + int *m, *m_end; + sprintf(fn, "%d", *y); + + /* Get the months as directories. */ + if(chdir(fn) == -1 || !(dir = opendir("."))) goto catch; + while((de = readdir(dir))) { + struct stat st; + int month, *p; + if(!(month = lex_looks_like_month(de->d_name))) continue; + if(stat(de->d_name, &st)) goto catch; + if(!S_ISDIR(st.st_mode)) continue; + if(!(p = int_array_new(&months))) goto catch; + *p = month; + } + closedir(dir), dir = 0; + qsort(months.data, months.size, sizeof *months.data, &void_int_cmp); + fprintf(stderr, "Months in <<%s>>: %s.)\n", + fn, int_array_to_string(&months)); + + /* Go though each month. */ + for(m = months.data, m_end = m + months.size; m < m_end; m++) { + int *d, *d_end; + sprintf(fn, "%.2d", *m); + + /* Get the days as files. */ + if(chdir(fn) == -1 || !(dir = opendir("."))) goto catch; + while((de = readdir(dir))) { + struct stat st; + int day, *p; + /* fixme: Have yyyy-mm-dd to figure out how many days. */ + if(!(day = lex_looks_like_day(de->d_name))) continue; + if(stat(de->d_name, &st)) goto catch; + if(S_ISDIR(st.st_mode)) continue; + if(!(p = int_array_new(&days))) goto catch; + *p = day; + } + closedir(dir), dir = 0; + qsort(days.data, days.size, sizeof *days.data, &void_int_cmp); + fprintf(stderr, "Days in <<%s>>: %s.\n", + fn, int_array_to_string(&days)); + + for(d = days.data, d_end = d + days.size; d < d_end; d++) { + struct lex *lex = 0; + struct page *page = 0; + union date32 d32; + if(!(d32 = date_to_32(*y, *m, *d)).year) { errno = EILSEQ; + intent = "date parse error"; goto syntax; } + sprintf(fn, "%.2d.txt", *d); + if(page_tree_bulk_add(&journal, d32, &page) != TREE_UNIQUE) { + if(!errno) intent = "not unique", errno = EDOM; + goto syntax; + } + page->entry = char_array(); + page->meaning = lex_array(); + if(!append_file(&page->entry, fn)) goto syntax; + int first = 1; + for(lex_reset(page->entry.data); ; ) { + if(!(lex = lex_array_new(&page->meaning))) goto syntax; + if(!lex_next(lex)) { + if(lex->symbol != END) { errno = EILSEQ; goto syntax; } + break; /* Terminated successfully. */ + } + switch(lex->symbol) { + case TEXT: printf("%s%.*s", + first ? "" : " ", (int)(lex->s1 - lex->s0), lex->s0); + first = 0; break; + case PARAGRAPH: printf("\n" C_RESET); break; + case KJV_BOOK: printf(C_YELLOW "%.*s", + (int)(lex->s1 - lex->s0), lex->s0); break; + case KJV_CHAPTER_VERSE: printf(" ch. %.*s", + (int)(lex->s1 - lex->s0), lex->s0); break; + case KJV_TEXT: printf("%.*s", + (int)(lex->s1 - lex->s0), lex->s0); break; + case KJV_NEXT: printf("(next)\n"); break; + default: + fprintf(stderr, "%lu: %s", + (unsigned long)lex->line, lex_symbols[lex->symbol]); + if(lex->s0 && lex->s1) { + if(lex->s0 + INT_MAX < lex->s1) + intent = "line too long", errno = EILSEQ; + else + fprintf(stderr, " <<%.*s>>", + (int)(lex->s1 - lex->s0), lex->s0); + } + fprintf(stderr, ".\n"); + break; + } + } + continue; +syntax: + fprintf(stderr, "On date: %d-%.2d-%.2d.\n", *y, *m, *d); + if(!page) goto catch; + if(!lex) { fprintf(stderr, "While parsing <<<\n%s>>>.\n", + page->entry.data); goto catch; } + for(struct lex_array_iterator it + = lex_array_iterator(&page->meaning); + lex = lex_array_next(&it); ) { + fprintf(stderr, "%lu: %s", + (unsigned long)lex->line, lex_symbols[lex->symbol]); + if(lex->s0 && lex->s1) { + if(lex->s0 + INT_MAX < lex->s1) + intent = "line too long", errno = EILSEQ; + else + fprintf(stderr, " <<%.*s>>", + (int)(lex->s1 - lex->s0), lex->s0); + } + fprintf(stderr, ".\n"); + } + goto catch; + } + + int_array_clear(&days); + if(chdir("..") == -1) goto catch; + } + + int_array_clear(&months); + if(chdir("..") == -1) goto catch; + /* fixme: Expand, contact is the next thing that it doesn't get. */ + if(*y == 1993/*1996*/) break; + } + page_tree_bulk_finish(&journal); + int_array_(&years), int_array_(&months), int_array_(&days); + fprintf(stderr, "Journal has entries: %s\n", page_tree_to_string(&journal)); + + /* Do something interesting? */ + if(!bible_graph(&journal)) goto catch; + + goto finally; +catch: + success = EXIT_FAILURE; + perror("interpret"); + if(intent) fprintf(stderr, "Further explanation: %s.\n", intent); +finally: + if(dir && closedir(dir)) success = EXIT_FAILURE, perror("dir"); + int_array_(&years), int_array_(&months), int_array_(&days); + struct page_tree_entry entry; + for(struct page_tree_iterator it = page_tree_begin(&journal); + (entry = page_tree_next(&it)).key; ) { + struct page *const page = entry.value; + char z[12]; + date32_to_string(*entry.key, &z); + lex_array_(&page->meaning); + char_array_(&page->entry); + } + return success; +} + +/* Popular KJV 738137 words. */ + +struct scan scan(const char *const buffer) { + struct scan scan; + scan.marker = scan.from = scan.cursor = scan.label = scan.buffer = buffer; + scan.condition = yycline; + scan.line = 1; + return scan; +} + +int scan_next(struct scan *const s, struct lex *const x) { + /*!re2c /**/ + re2c:flags:tags = 1; + re2c:define:YYCTYPE = char; + re2c:yyfill:enable = 0; + re2c:define:YYCURSOR = s->cursor; + re2c:define:YYMARKER = s->marker; + re2c:define:YYCONDTYPE = 'condition'; + re2c:define:YYGETCONDITION = 's->condition'; + re2c:define:YYGETCONDITION:naked = 1; + re2c:define:YYSETCONDITION = 's->condition = @@;'; + re2c:define:YYSETCONDITION:naked = 1; + sentinel = "\x00"; + newline = "\n"; + unix_control = [\x01-\x08\x0a-\x1f\x7f]; + ws = [ \t]; + glyph = [^] \ (sentinel | unix_control | newline | ws); + keyword = ([a-zA-Z] | [0-9][0-9_\-]*[a-zA-Z]) [a-zA-Z0-9_\-]*; + decimal = "-"? ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0]; + natural = [1-9][0-9]*; + date = "-"? natural "-" [0-1][0-9] "-" [0-1][0-9]; + */ + const char *s0, *s1; + /*!stags:re2c format = 'const char *@@;\n'; */ + assert(s && x); + if(!s->buffer) return 0; + x->line = s->line; + x->s0 = x->s1 = 0; +scan: + /*!re2c /**/ + <*> unix_control { return x->symbol = ILLEGAL, 0; } + <*> * { return x->symbol = SYNTAX, 0; } + <*> sentinel /* New line always delimits. */ + { return x->symbol = s->condition == yycline ? END : ILLEGAL, 0; } + newline => line { x->line = ++s->line; goto scan; } + /* Symbols that go at the beginning of a line. */ + newline { x->line = ++s->line; goto scan; } + "[" :=> edict + "--" :=> source + "->" :=> location + "!" => text { return x->symbol = COMPLETE, 1; } + "^" => text { return x->symbol = CANCELLED, 1; } + "#" => text { return x->symbol = HEADING, 1; } + * :=> text + + newline => line { x->line = ++s->line; goto scan; } + ws+ { goto scan; } + @s0 glyph+ @s1 + { x->s0 = s0, x->s1 = s1; return x->symbol = TEXT, 1; } + + + @s0 keyword @s1 => expect_line + { x->s0 = s0, x->s1 = s1; return x->symbol = SOURCE_RECALL, 1; } + + "" / "(" :=> map + "[" ws* @s0 keyword @s1 ws* "]" + { x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION_SAVE, 1; } + @s0 keyword @s1 => expect_line + { x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION_RECALL, 1; } + + "(" @s0 decimal "," @s1 decimal ")" => expect_caption + { x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION, 1; } + + "source" :=> source + "ed" :=> ed + "contact" :=> contact + "glider" :=> glider + "flight" :=> flight + "bible" :=> bible + "book" :=> book + "movie" :=> movie + "tv" :=> tv + "medication" :=> medication + "idea" :=> idea + "vaccine" :=> vaccine + "in" :=> in + "" / natural :=> significant + [0-1][0-9] "-" [0-3][0-9] + ", " [0-2][0-9] ":" [0-5][0-9] "] " + :=> text /* This is likely WhatsApp conversations. Ignore. */ + /* missed, show, 'First, Second', 'Sounds', 'CSS', ..., 'Swanky', 'Shields' */ + + /* How did it get into my journal? */ + "source" + { if(s->is_ws_expected || s->edict.size) + return x->symbol = SYNTAX, 0; + s->is_ws_expected = 1, s->is_source = 1; + s->edict.size = 2; + s->edict.expect[1] = EXPECT_KEYWORD; + s->edict.expect[0] = EXPECT_END_TEXT; + return x->symbol = SOURCE, 1; } + "default" + { if(s->is_ws_expected || !s->is_source) + return x->symbol = SYNTAX, 0; + s->is_ws_expected = 1, s->is_source = 0; + return x->symbol = DEFAULT, 1; } + + /* Editorializing; looking back. */ + "ed" + { if(s->is_ws_expected || s->edict.size) + return x->symbol = SYNTAX, 0; + s->is_ws_expected = 1; /* no idea, just copy; probably should do sth */ + s->edict.size = 1; + s->edict.expect[0] = EXPECT_END_TEXT; /* Pithy comment. */ + return x->symbol = EDITORIALIZING, 1; } + + /* Score. */ + "significant" + { if(s->is_ws_expected || s->edict.size) + return x->symbol = SYNTAX, 0; + s->is_ws_expected = 1; + s->edict.size = 3; + s->edict.expect[2] = EXPECT_NATURAL; /* Ordinal. */ + s->edict.expect[1] = EXPECT_RESTRICT_TEXT; /* Name. */ + s->edict.expect[0] = EXPECT_DATE; /* Birthday. */ + return x->symbol = SIGNIFICANT, 1; } + @s0 natural @s1 + { if(s->is_ws_expected || s->edict.size) + return x->symbol = SYNTAX, 0; + s->is_ws_expected = 1; + x->s0 = s0, x->s1 = s1; + return x->symbol = SIGNIFICANT_RECALL, 1; } + + /* General [edict: whatever]. */ + ws+ { s->is_ws_expected = 0; goto scan; } + ":" + { if(!s->edict.size) return x->symbol = SYNTAX, 0; + s->is_ws_expected = 0, s->is_source = 0; + expect_pop(); goto scan; } + ws* @s0 keyword @s1 ws* ";"? + { x->s0 = s0, x->s1 = s1; expect_pop(); + return x->symbol = ARG_KEYWORD, 1; } + ws* @s0 date @s1 ws* ";"? + { x->s0 = s0, x->s1 = s1; expect_pop(); + return x->symbol = ARG_DATE, 1; } + ws* @s0 natural @s1 ws* ";"? + { x->s0 = s0, x->s1 = s1; expect_pop(); + return x->symbol = ARG_NATURAL, 1; } + + ws* @s0 (glyph \ [;[\]]) ((glyph \ [;[\]]) | ws)* @s1 ws* ";"? + { x->s0 = s0, x->s1 = s1; expect_pop(); + return x->symbol = ARG_RESTRICT_TEXT, 1; } + + ws* @s0 (glyph \ [[\]]) ((glyph \ [[\]]) | ws)* @s1 ws* + { x->s0 = s0, x->s1 = s1; expect_pop(); + return x->symbol = ARG_END_TEXT, 1; } + "]" => expect_line + { if(s->edict.size) return 0; goto scan; } + */ +} + +#define FOR_SYMBOL(X) \ + \ + /* Results. */ \ + X(END, 0), \ + X(SYNTAX, 0), \ + X(ILLEGAL, 0), \ + X(NOT_FOUND, 0), \ + \ + X(ORDERED_LIST_ITEM, &no_vt), \ + X(LIST_ITEM, 0), \ + X(COMPLETE, 0), \ + X(CANCELLED, 0), \ + X(HEADING, 0), \ + /* Text. */ \ + X(PARAGRAPH, 0), \ + X(TEXT, &word_vt), \ + /*This is lazy.*/X(CAPTION, &word_vt), \ + \ + /* Edicts. */ \ + X(SOURCE, &word_vt), \ + X(DEFAULT, 0), \ + X(SOURCE_RECALL, &word_vt), \ + X(LOCATION, 0), \ + X(LOCATION_SAVE, 0), \ + X(LOCATION_RECALL, 0), \ + X(SIGNIFICANT, 0), \ + X(SIGNIFICANT_RECALL, 0), \ + X(EDITORIALIZING, 0), \ + \ + /* Arguments. */ \ + X(ARG_KEYWORD, 0), \ + X(ARG_DATE, &date_vt), \ + X(ARG_NATURAL, &no_vt), \ + X(ARG_RESTRICT_TEXT, &word_vt), \ + X(ARG_END_TEXT, &word_vt), \ + \ + /* Bible */ \ + X(KJV_BOOK, &kjv_book_vt), \ + X(KJV_CHAPTER_VERSE, &word_vt), \ + X(KJV_TEXT, &word_vt), \ + X(KJV_NEXT, 0) + +int scan(union date32 date, const char *const buffer); + +struct scan { + const char *marker, *from, *cursor, *limit, *label, *buffer; + int condition; + size_t line; + int is_ws_expected; +}; + +struct lex { + size_t line; +#define ARG1(n, m) n + enum lex_symbol { FOR_SYMBOL(ARG1) } symbol; +#undef ARG1 + const char *s0, *s1; +}; +#define STR1(n, m) #n +static const char *const lex_symbols[] = { FOR_SYMBOL(STR1) }; +#undef X + +struct scan scan(const char *); +int scan_next(struct scan *const s, struct lex *const x); diff --git a/src/scan.h b/src/scan.h deleted file mode 100644 index 06ebde9..0000000 --- a/src/scan.h +++ /dev/null @@ -1,74 +0,0 @@ -#include "journal.h" - -int lex_looks_like_year(const char *, int *); -int lex_looks_like_month(const char *); -int lex_looks_like_day(const char *); - -#define FOR_SYMBOL(X) \ - \ - /* Results. */ \ - X(END, 0), \ - X(SYNTAX, 0), \ - X(ILLEGAL, 0), \ - X(NOT_FOUND, 0), \ - \ - X(ORDERED_LIST_ITEM, &no_vt), \ - X(LIST_ITEM, 0), \ - X(COMPLETE, 0), \ - X(CANCELLED, 0), \ - X(HEADING, 0), \ - /* Text. */ \ - X(PARAGRAPH, 0), \ - X(TEXT, &word_vt), \ - /*This is lazy.*/X(CAPTION, &word_vt), \ - \ - /* Edicts. */ \ - X(SOURCE, &word_vt), \ - X(DEFAULT, 0), \ - X(SOURCE_RECALL, &word_vt), \ - X(LOCATION, 0), \ - X(LOCATION_SAVE, 0), \ - X(LOCATION_RECALL, 0), \ - X(SIGNIFICANT, 0), \ - X(SIGNIFICANT_RECALL, 0), \ - X(EDITORIALIZING, 0), \ - \ - /* Arguments. */ \ - X(ARG_KEYWORD, 0), \ - X(ARG_DATE, &date_vt), \ - X(ARG_NATURAL, &no_vt), \ - X(ARG_RESTRICT_TEXT, &word_vt), \ - X(ARG_END_TEXT, &word_vt), \ - \ - /* Bible */ \ - X(KJV_BOOK, &kjv_book_vt), \ - X(KJV_CHAPTER_VERSE, &word_vt), \ - X(KJV_TEXT, &word_vt), \ - X(KJV_NEXT, 0) - -int scan(union date32 date, const char *const buffer); - -#if 0 - -struct scan { - const char *marker, *from, *cursor, *limit, *label, *buffer; - int condition; - size_t line; - int is_ws_expected; -}; - -struct lex { - size_t line; -#define ARG1(n, m) n - enum lex_symbol { FOR_SYMBOL(ARG1) } symbol; -#undef ARG1 - const char *s0, *s1; -}; -#define STR1(n, m) #n -static const char *const lex_symbols[] = { FOR_SYMBOL(STR1) }; -#undef X - -struct scan scan(const char *); -int scan_next(struct scan *const s, struct lex *const x); - -#endif diff --git a/src/scan.re.c b/src/scan.re.c deleted file mode 100644 index 56a6e35..0000000 --- a/src/scan.re.c +++ /dev/null @@ -1,228 +0,0 @@ -/** @license 2022 Neil Edelman, distributed under the terms of the - [MIT License](https://opensource.org/licenses/MIT). - - Lexer for journal entries. - - "^[edict: expect; there; to; be; args] Something.\n" - "^[] Something.\n" - "+.{2}()$" - " -- \"*\n" - - @std C89/90 */ - -#include "../src/scan.h" -#include -#include -#include -#include -#include - -/* This defines `enum condition`. */ -/*!types:re2c*/ - -int scan(union date32 date, const char *const buffer) { - const char *YYCURSOR = buffer; - /*!re2c /**/ - re2c:define:YYCTYPE = char; - re2c:yyfill:enable = 0; - */ -} - - - - - - - - -#if 0 - -struct scan scan(const char *const buffer) { - struct scan scan; - scan.marker = scan.from = scan.cursor = scan.label = scan.buffer = buffer; - scan.condition = yycline; - scan.line = 1; - return scan; -} - -int scan_next(struct scan *const s, struct lex *const x) { - /*!re2c /**/ - re2c:flags:tags = 1; - re2c:define:YYCTYPE = char; - re2c:yyfill:enable = 0; - re2c:define:YYCURSOR = s->cursor; - re2c:define:YYMARKER = s->marker; - re2c:define:YYCONDTYPE = 'condition'; - re2c:define:YYGETCONDITION = 's->condition'; - re2c:define:YYGETCONDITION:naked = 1; - re2c:define:YYSETCONDITION = 's->condition = @@;'; - re2c:define:YYSETCONDITION:naked = 1; - sentinel = "\x00"; - newline = "\n"; - unix_control = [\x01-\x08\x0a-\x1f\x7f]; - ws = [ \t]; - glyph = [^] \ (sentinel | unix_control | newline | ws); - keyword = ([a-zA-Z] | [0-9][0-9_\-]*[a-zA-Z]) [a-zA-Z0-9_\-]*; - decimal = "-"? ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0]; - natural = [1-9][0-9]*; - date = "-"? natural "-" [0-1][0-9] "-" [0-1][0-9]; - */ - const char *s0, *s1; - /*!stags:re2c format = 'const char *@@;\n'; */ - assert(s && x); - if(!s->buffer) return 0; - x->line = s->line; - x->s0 = x->s1 = 0; -scan: - /*!re2c /**/ - <*> unix_control { return x->symbol = ILLEGAL, 0; } - <*> * { return x->symbol = SYNTAX, 0; } - <*> sentinel /* New line always delimits. */ - { return x->symbol = s->condition == yycline ? END : ILLEGAL, 0; } - newline => line { x->line = ++s->line; goto scan; } - /* Symbols that go at the beginning of a line. */ - newline { x->line = ++s->line; goto scan; } - "[" :=> edict - "--" :=> source - "->" :=> location - "!" => text { return x->symbol = COMPLETE, 1; } - "^" => text { return x->symbol = CANCELLED, 1; } - "#" => text { return x->symbol = HEADING, 1; } - * :=> text - - newline => line { x->line = ++s->line; goto scan; } - ws+ { goto scan; } - @s0 glyph+ @s1 - { x->s0 = s0, x->s1 = s1; return x->symbol = TEXT, 1; } - - bible_ref = natural ":" natural [ab]? ("-" (natural ":")? natural [ab]?)?; - glyph_minus = glyph \ [']; - - @s0 ("Genesis" | "Exodus" | "Leviticus" | "Numbers" | "Deuteronomy" - | "Joshua" | "Judges" | "Ruth" | "I"{1,2} " Samuel" | "I"{1,2} " Kings" - | "I"{1,2} " Chronicles" | "Ezra" | "Nehemiah" | "Esther" | "Job" - | "Psalms" | "Proverbs" | "Ecclesiastes" | "Song of Solomon" | "Isaiah" - | "Jeremiah" | "Lamentations" | "Ezekiel" | "Daniel" | "Hosea" | "Joel" - | "Amos" | "Obadiah" | "Jonah" | "Micah" | "Nahum" | "Habakkuk" - | "Zephaniah" | "Haggai" | "Zechariah" | "Malachi" | "Matthew" | "Mark" - | "Luke" | "John" | "Acts" | "Romans" | "I"{1,2} " Corinthians" - | "Galatians" | "Ephesians" | "Philippians" | "Colossians" - | "I"{1,2} " Thessalonians" | "I"{1,2} " Timothy" | "Titus" | "Philemon" - | "Hebrews" | "James" | "I"{1,2} " Peter" | "I"{1,3} " John" | "Jude" - | "Revelation") @s1 ws* / bible_ref ws+ "--" ws+ "``" - => bible { x->s0 = s0, x->s1 = s1; return x->symbol = KJV_BOOK, 1; } - @s0 bible_ref @s1 ws+ "--" ws+ "``" - { x->s0 = s0, x->s1 = s1; return x->symbol = KJV_CHAPTER_VERSE, 1; } - "``" { return x->symbol = KJV_NEXT, 1; } - "''" :=> text - /* fixme: This is a hack that doesn't allow apostrophes at the end of a - word, (not sure there are any in the bible.) Is ' terminated by ''; - otherwise same as glyph+ above. */ - @s0 ("'"? glyph_minus+ ("'" glyph_minus+)*) @s1 - { x->s0 = s0, x->s1 = s1; return x->symbol = KJV_TEXT, 1; } - /* Multiple verses can be present, but they end in ''. - Not strictly enforced. */ - newline / (newline | "``") { x->line = ++s->line; goto scan; } - newline { return x->symbol = SYNTAX, 0; } - - @s0 keyword @s1 => expect_line - { x->s0 = s0, x->s1 = s1; return x->symbol = SOURCE_RECALL, 1; } - - "" / "(" :=> map - "[" ws* @s0 keyword @s1 ws* "]" - { x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION_SAVE, 1; } - @s0 keyword @s1 => expect_line - { x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION_RECALL, 1; } - - "(" @s0 decimal "," @s1 decimal ")" => expect_caption - { x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION, 1; } - - "source" :=> source - "ed" :=> ed - "contact" :=> contact - "glider" :=> glider - "flight" :=> flight - "bible" :=> bible - "book" :=> book - "movie" :=> movie - "tv" :=> tv - "medication" :=> medication - "idea" :=> idea - "vaccine" :=> vaccine - "in" :=> in - "" / natural :=> significant - [0-1][0-9] "-" [0-3][0-9] - ", " [0-2][0-9] ":" [0-5][0-9] "] " - :=> text /* This is likely WhatsApp conversations. Ignore. */ - /* missed, show, 'First, Second', 'Sounds', 'CSS', ..., 'Swanky', 'Shields' */ - - /* How did it get into my journal? */ - "source" - { if(s->is_ws_expected || s->edict.size) - return x->symbol = SYNTAX, 0; - s->is_ws_expected = 1, s->is_source = 1; - s->edict.size = 2; - s->edict.expect[1] = EXPECT_KEYWORD; - s->edict.expect[0] = EXPECT_END_TEXT; - return x->symbol = SOURCE, 1; } - "default" - { if(s->is_ws_expected || !s->is_source) - return x->symbol = SYNTAX, 0; - s->is_ws_expected = 1, s->is_source = 0; - return x->symbol = DEFAULT, 1; } - - /* Editorializing; looking back. */ - "ed" - { if(s->is_ws_expected || s->edict.size) - return x->symbol = SYNTAX, 0; - s->is_ws_expected = 1; /* no idea, just copy; probably should do sth */ - s->edict.size = 1; - s->edict.expect[0] = EXPECT_END_TEXT; /* Pithy comment. */ - return x->symbol = EDITORIALIZING, 1; } - - /* Score. */ - "significant" - { if(s->is_ws_expected || s->edict.size) - return x->symbol = SYNTAX, 0; - s->is_ws_expected = 1; - s->edict.size = 3; - s->edict.expect[2] = EXPECT_NATURAL; /* Ordinal. */ - s->edict.expect[1] = EXPECT_RESTRICT_TEXT; /* Name. */ - s->edict.expect[0] = EXPECT_DATE; /* Birthday. */ - return x->symbol = SIGNIFICANT, 1; } - @s0 natural @s1 - { if(s->is_ws_expected || s->edict.size) - return x->symbol = SYNTAX, 0; - s->is_ws_expected = 1; - x->s0 = s0, x->s1 = s1; - return x->symbol = SIGNIFICANT_RECALL, 1; } - - /* General [edict: whatever]. */ - ws+ { s->is_ws_expected = 0; goto scan; } - ":" - { if(!s->edict.size) return x->symbol = SYNTAX, 0; - s->is_ws_expected = 0, s->is_source = 0; - expect_pop(); goto scan; } - ws* @s0 keyword @s1 ws* ";"? - { x->s0 = s0, x->s1 = s1; expect_pop(); - return x->symbol = ARG_KEYWORD, 1; } - ws* @s0 date @s1 ws* ";"? - { x->s0 = s0, x->s1 = s1; expect_pop(); - return x->symbol = ARG_DATE, 1; } - ws* @s0 natural @s1 ws* ";"? - { x->s0 = s0, x->s1 = s1; expect_pop(); - return x->symbol = ARG_NATURAL, 1; } - - ws* @s0 (glyph \ [;[\]]) ((glyph \ [;[\]]) | ws)* @s1 ws* ";"? - { x->s0 = s0, x->s1 = s1; expect_pop(); - return x->symbol = ARG_RESTRICT_TEXT, 1; } - - ws* @s0 (glyph \ [[\]]) ((glyph \ [[\]]) | ws)* @s1 ws* - { x->s0 = s0, x->s1 = s1; expect_pop(); - return x->symbol = ARG_END_TEXT, 1; } - "]" => expect_line - { if(s->edict.size) return 0; goto scan; } - */ -} - -#endif diff --git a/src/scan_kjv.h b/src/scan_kjv.h new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/scan_kjv.h @@ -0,0 +1 @@ + diff --git a/src/scan_kjv.re.c b/src/scan_kjv.re.c new file mode 100644 index 0000000..8eb53d3 --- /dev/null +++ b/src/scan_kjv.re.c @@ -0,0 +1,100 @@ +/** @license 2022 Neil Edelman, distributed under the terms of the + [MIT License](https://opensource.org/licenses/MIT). + + Scan journal entries for kjv references. */ + +#include "../src/journal.h" +#include "../src/kjv.h" +#include "../src/helper.h" +#include +#include +#include +#include + +/*!conditions:re2c*/ + +static int scan(union date32 date, const char *const buffer) { + const char *YYCURSOR, *YYMARKER, *yyt1, *yyt2, *yyt3, *s0, *s1, *t0, *t1; + enum kjv_book book = Revelation; + uint32_t chapter, verse; + enum YYCONDTYPE condition = yycline; + size_t line = 1; + YYCURSOR = YYMARKER = yyt1 = buffer; + /*!re2c /**/ + re2c:define:YYCTYPE = char; + re2c:yyfill:enable = 0; + re2c:define:YYGETCONDITION = "condition"; + re2c:define:YYSETCONDITION = "condition = @@;"; + re2c:define:YYGETCONDITION:naked = 1; + re2c:define:YYSETCONDITION:naked = 1; + + unix_control = [\x01-\x08\x0a-\x1f\x7f]; + ws = [ \t]; + glyph = [^] \ ("\x00" | "\n" | unix_control | ws); + natural = [1-9][0-9]*; + lookat = ws* natural ":" natural [ab]? + ("-" (natural ":")? natural [ab]?)? ws* "--"; + */ + for( ; ; ) { /*!re2c /**/ + * { printf("*! %zu\n", line);goto error; } + "\x00" { printf("yes!\n");return 1; } + * :=> skip + "Genesis" / lookat => book { book = Genesis; continue; } + "Exodus" / lookat => book { book = Exodus; continue; } + /*| "Leviticus" | "Numbers" | "Deuteronomy" + | "Joshua" | "Judges" | "Ruth" | "I"{1,2} " Samuel" | "I"{1,2} " Kings" + | "I"{1,2} " Chronicles" | "Ezra" | "Nehemiah" | "Esther" | "Job" + | "Psalms" | "Proverbs" | "Ecclesiastes" | "Song of Solomon" | "Isaiah" + | "Jeremiah" | "Lamentations" | "Ezekiel" | "Daniel" | "Hosea" | "Joel" + | "Amos" | "Obadiah" | "Jonah" | "Micah" | "Nahum" | "Habakkuk" + | "Zephaniah" | "Haggai" | "Zechariah" | "Malachi" | "Matthew" | "Mark" + | "Luke" | "John" | "Acts" | "Romans" | "I"{1,2} " Corinthians" + | "Galatians" | "Ephesians" | "Philippians" | "Colossians" + | "I"{1,2} " Thessalonians" | "I"{1,2} " Timothy" | "Titus" | "Philemon" + | "Hebrews" | "James" | "I"{1,2} " Peter" | "I"{1,3} " John" | "Jude" + | "Revelation") @s1 ws* / bible_ref ws+ "--" ws+ "``" */ + // [^\n\x00]* newline { printf("throw\n"); line++; continue; } + ws+ @s0 natural @s1 ":" @t0 natural @t1 [ab]? => skip { + if(!helper_natural(s0, s1, &chapter) + || !helper_natural(t0, t1, &verse)) goto error; + union kjvcite c + = { .book = book, .chapter = chapter, .verse = verse }; + char a[12]; + kjvcite_to_string(c, &a), printf("Parsed %s\n", a); + continue; + } + [^\n\x00]* "\n" => line { line++; continue; } + //=> bible { x->s0 = s0, x->s1 = s1; return x->symbol = KJV_BOOK, 1; } + */ } +error: + { + char a[12]; + date32_to_string(date, &a); + fprintf(stderr, "%s line %zu: unexpected.\n", a, line); + } + return 0; +} + +int main(void) { + struct journal j = journal(); + int success = EXIT_SUCCESS; + struct journal_iterator it; + union date32 k; + union load *v; + if(!journal_is_valid(&j)) goto catch; + printf("Journal: %s.\n", journal_to_string(&j)); + if(!scan((union date32){ .u32 = 42 }, "Genesis 1:1 -- fdgYo.\n" + "Exodus 1:2 -- fuck\n" + "no\n")) fprintf(stderr, "Error :[\n"); + it = journal_begin(&j); while(journal_next(&it, &k, &v)) { + char a[12]; + date32_to_string(k, &a), printf("%s: %s\n", a, v->text); + } + goto finally; +catch: + success = EXIT_FAILURE; + perror("journal"); +finally: + journal_(&j); + return success; +}