From ccdee8a779835b316bcdc0aa127e2ce9eb649382 Mon Sep 17 00:00:00 2001 From: Neil Date: Thu, 29 Dec 2022 21:32:40 -0800 Subject: [PATCH] Scan all kjv worked; had files that were not delimited. --- src/scan_kjv.re.c | 67 +++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/src/scan_kjv.re.c b/src/scan_kjv.re.c index b4b52b7..ffb6b26 100644 --- a/src/scan_kjv.re.c +++ b/src/scan_kjv.re.c @@ -16,16 +16,15 @@ static int scan(union date32 date, const char *const buffer, struct kjv *const kj) { - const char *YYCURSOR, *YYMARKER, *yyt1, *yyt2, *yyt3, - *s0, *s1, *t0, *t1, *line_start, *line_end; + const char *YYCURSOR, *YYMARKER, *yyt1, *yyt2, *yyt3, *s0, *s1, *t0, *t1; enum kjv_book book = Revelation; uint32_t chapter = 0, verse = 0, verse_end = 0; enum YYCONDTYPE condition = yycline; size_t line = 1; char datestr[12] = {0}; - int is_found = 0; + const char *why = "unexpected"; assert(buffer && kj); - YYCURSOR = YYMARKER = yyt1 = line_start = buffer; + YYCURSOR = YYMARKER = yyt1 = buffer; /*!re2c /**/ re2c:define:YYCTYPE = char; re2c:yyfill:enable = 0; @@ -39,17 +38,19 @@ static int scan(union date32 date, const char *const buffer, glyph = [^] \ ("\x00" | "\n" | unix_control | ws); natural = [1-9][0-9]*; engage = ws+ "--" ws+; - lookat = ws* natural ":" natural [ab]? - ("-" (natural ":")? natural [ab]?)? engage; + /* (natural ":")? Don't use for memorizing and use for reading, I think? */ + /*("``"|"\"") This is not in the next book. */ + lookat = ws* natural ":" natural [ab]? ("-" natural [ab]?)? engage; first = ("I" | "1") " "?; second = ("II" | "2") " "?; third = ("III" | "3") " "?; */ for( ; ; ) { /*!re2c /**/ - * { goto catch; } + [^\n\x00] { continue; } + "\x00" { why = "no newline at end of file"; goto catch; } + "\n" => line { line++; continue; } "\x00" { return 1; } - "\n" @line_start - { fprintf(stderr, "%4zu\n", line); line++; continue; } + "\n" { line++; continue; } * :=> skip /* Guess it can't be simplified? */ "Genesis" / lookat => book { book = Genesis; continue; } "Exodus" / lookat => book { book = Exodus; continue; } @@ -126,28 +127,35 @@ static int scan(union date32 date, const char *const buffer, third "John" / lookat => book { book = IIIJohn; continue; } "Jude" / lookat => book { book = Jude; continue; } "Revelation" / lookat => book { book = Revelation; continue; } + * { why = "default unrecognized"; goto catch; } + /* 19:15a, just ignore the a. */ ws+ @s0 natural @s1 ":" @t0 natural @t1 [ab]? { - if(chapter || verse || verse_end - || !helper_natural(s0, s1, &chapter) - || !helper_natural(t0, t1, &verse)) goto catch; + if(chapter || verse || verse_end) + { why = "reference unrecognized"; goto catch; } + if(!helper_natural(s0, s1, &chapter) + || !helper_natural(t0, t1, &verse)) + { why = "reference numerical error"; goto catch; } continue; } "-" @s0 natural @s1 [ab]? { /* Verse range. */ - if(!chapter || !verse || verse_end - || !helper_natural(s0, s1, &verse_end)) goto catch; + if(!chapter || !verse || verse_end) + { why = "range unrecognized"; goto catch; } + if(!helper_natural(s0, s1, &verse_end)) + { why = "range numerical error"; goto catch; } continue; } engage => skip { const size_t old_set_words = kj->set_words; char citestr[12]; - if(!chapter || !verse || verse_end && verse_end <= verse) - goto catch; + if(!chapter || !verse) { why = "missing information"; goto catch; } + if(verse_end && verse_end <= verse) + { why = "interval error"; goto catch; } union kjvcite cite = { .book = book, .chapter = chapter, .verse = verse }; if(!datestr[0]) date32_to_string(date, &datestr); /* Only once. */ kjvcite_to_string(cite, &citestr); for( ; ; verse++, cite.verse++) { - if(!kjv_add(kj, cite)) goto catch; + if(!kjv_add(kj, cite)) { why = "add to set"; goto catch; } if(!verse_end || verse_end <= verse) break; } if(verse_end) { @@ -158,26 +166,15 @@ static int scan(union date32 date, const char *const buffer, datestr, old_set_words, kj->set_words, citestr); } book = Revelation, chapter = 0, verse = 0, verse_end = 0; - is_found = 1; - continue; - } - [^\n\x00]* @line_end "\n" @s0 => line { - const size_t size = (size_t)(line_end - line_start); - int intsize = size > 40 ? 40 : (int)size; - fprintf(stderr, "%s%4zu: %.*s%s\n", is_found ? "\033[1;35m" : "", - line, intsize, line_start, is_found ? "\033[0m" : ""); - line_start = s0, is_found = 0; - line++; continue; } */ } + assert(0); /* Never gets here. */ catch: if(!errno) errno = EILSEQ; - { - char a[12]; - date32_to_string(date, &a); - fprintf(stderr, "%s line %zu: unexpected.\n", a, line); - } + date32_to_string(date, &datestr); + fprintf(stderr, "%s\n" + "%s line %zu: %s.\n", buffer, datestr, line, why); return 0; } @@ -192,6 +189,8 @@ int main(void) { /*scan((union date32){.year=2000, .month=1, .day=1}, "\n\n\n" "Romans 3:23 -- ``For all have sinned, " "and come short of the glory of God.''\n", &kj);*/ + /* FIXME: have a column with the number so we can see how much uniqueness + makes a difference. */ j = journal(); if(!journal_is_valid(&j)) goto catch; fprintf(stderr, "Journal: %s.\n", journal_to_string(&j)); @@ -201,14 +200,14 @@ int main(void) { "# date\told\tnew / %zu\n", kj.total_words); it = journal_begin(&j), i = 0; while(journal_next(&it, &k, &v)) { if(!scan(k, v->text, &kj)) goto catch; - if(++i > 32) break; + /*if(++i > 32) break;*/ } printf("EOD\n" "set monochrome\n" "set xdata time\n" "set timefmt \"%%Y-%%m-%%d\"\n" "set xtics format \"%%Y-%%m-%%d\" rotate by -30\n" - "set ylabel \"KJV memorized\"\n" + "set ylabel \"words\"\n" "set format y \"%%g%%%%\"\n" "unset key #set key bottom right\n" "set grid\n"