/** @license 2022 Neil Edelman, distributed under the terms of the [MIT License](https://opensource.org/licenses/MIT). Lexer for journal entries. "^[edict: expect; there; to; be; args] Something.\n" "^[] Something.\n" "+.{2}()$" " -- \"*\n" @std C89/90 */ #include "../src/scan.h" #include #include #include #include #include /* This defines `enum condition`. */ /*!types:re2c*/ int scan(union date32 date, const char *const buffer) { const char *YYCURSOR = buffer; /*!re2c /**/ re2c:define:YYCTYPE = char; re2c:yyfill:enable = 0; */ } #if 0 struct scan scan(const char *const buffer) { struct scan scan; scan.marker = scan.from = scan.cursor = scan.label = scan.buffer = buffer; scan.condition = yycline; scan.line = 1; return scan; } int scan_next(struct scan *const s, struct lex *const x) { /*!re2c /**/ re2c:flags:tags = 1; re2c:define:YYCTYPE = char; re2c:yyfill:enable = 0; re2c:define:YYCURSOR = s->cursor; re2c:define:YYMARKER = s->marker; re2c:define:YYCONDTYPE = 'condition'; re2c:define:YYGETCONDITION = 's->condition'; re2c:define:YYGETCONDITION:naked = 1; re2c:define:YYSETCONDITION = 's->condition = @@;'; re2c:define:YYSETCONDITION:naked = 1; sentinel = "\x00"; newline = "\n"; unix_control = [\x01-\x08\x0a-\x1f\x7f]; ws = [ \t]; glyph = [^] \ (sentinel | unix_control | newline | ws); keyword = ([a-zA-Z] | [0-9][0-9_\-]*[a-zA-Z]) [a-zA-Z0-9_\-]*; decimal = "-"? ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0]; natural = [1-9][0-9]*; date = "-"? natural "-" [0-1][0-9] "-" [0-1][0-9]; */ const char *s0, *s1; /*!stags:re2c format = 'const char *@@;\n'; */ assert(s && x); if(!s->buffer) return 0; x->line = s->line; x->s0 = x->s1 = 0; scan: /*!re2c /**/ <*> unix_control { return x->symbol = ILLEGAL, 0; } <*> * { return x->symbol = SYNTAX, 0; } <*> sentinel /* New line always delimits. */ { return x->symbol = s->condition == yycline ? END : ILLEGAL, 0; } newline => line { x->line = ++s->line; goto scan; } /* Symbols that go at the beginning of a line. */ newline { x->line = ++s->line; goto scan; } "[" :=> edict "--" :=> source "->" :=> location "!" => text { return x->symbol = COMPLETE, 1; } "^" => text { return x->symbol = CANCELLED, 1; } "#" => text { return x->symbol = HEADING, 1; } * :=> text newline => line { x->line = ++s->line; goto scan; } ws+ { goto scan; } @s0 glyph+ @s1 { x->s0 = s0, x->s1 = s1; return x->symbol = TEXT, 1; } bible_ref = natural ":" natural [ab]? ("-" (natural ":")? natural [ab]?)?; glyph_minus = glyph \ [']; @s0 ("Genesis" | "Exodus" | "Leviticus" | "Numbers" | "Deuteronomy" | "Joshua" | "Judges" | "Ruth" | "I"{1,2} " Samuel" | "I"{1,2} " Kings" | "I"{1,2} " Chronicles" | "Ezra" | "Nehemiah" | "Esther" | "Job" | "Psalms" | "Proverbs" | "Ecclesiastes" | "Song of Solomon" | "Isaiah" | "Jeremiah" | "Lamentations" | "Ezekiel" | "Daniel" | "Hosea" | "Joel" | "Amos" | "Obadiah" | "Jonah" | "Micah" | "Nahum" | "Habakkuk" | "Zephaniah" | "Haggai" | "Zechariah" | "Malachi" | "Matthew" | "Mark" | "Luke" | "John" | "Acts" | "Romans" | "I"{1,2} " Corinthians" | "Galatians" | "Ephesians" | "Philippians" | "Colossians" | "I"{1,2} " Thessalonians" | "I"{1,2} " Timothy" | "Titus" | "Philemon" | "Hebrews" | "James" | "I"{1,2} " Peter" | "I"{1,3} " John" | "Jude" | "Revelation") @s1 ws* / bible_ref ws+ "--" ws+ "``" => bible { x->s0 = s0, x->s1 = s1; return x->symbol = KJV_BOOK, 1; } @s0 bible_ref @s1 ws+ "--" ws+ "``" { x->s0 = s0, x->s1 = s1; return x->symbol = KJV_CHAPTER_VERSE, 1; } "``" { return x->symbol = KJV_NEXT, 1; } "''" :=> text /* fixme: This is a hack that doesn't allow apostrophes at the end of a word, (not sure there are any in the bible.) Is ' terminated by ''; otherwise same as glyph+ above. */ @s0 ("'"? glyph_minus+ ("'" glyph_minus+)*) @s1 { x->s0 = s0, x->s1 = s1; return x->symbol = KJV_TEXT, 1; } /* Multiple verses can be present, but they end in ''. Not strictly enforced. */ newline / (newline | "``") { x->line = ++s->line; goto scan; } newline { return x->symbol = SYNTAX, 0; } @s0 keyword @s1 => expect_line { x->s0 = s0, x->s1 = s1; return x->symbol = SOURCE_RECALL, 1; } "" / "(" :=> map "[" ws* @s0 keyword @s1 ws* "]" { x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION_SAVE, 1; } @s0 keyword @s1 => expect_line { x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION_RECALL, 1; } "(" @s0 decimal "," @s1 decimal ")" => expect_caption { x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION, 1; } "source" :=> source "ed" :=> ed "contact" :=> contact "glider" :=> glider "flight" :=> flight "bible" :=> bible "book" :=> book "movie" :=> movie "tv" :=> tv "medication" :=> medication "idea" :=> idea "vaccine" :=> vaccine "in" :=> in "" / natural :=> significant [0-1][0-9] "-" [0-3][0-9] ", " [0-2][0-9] ":" [0-5][0-9] "] " :=> text /* This is likely WhatsApp conversations. Ignore. */ /* missed, show, 'First, Second', 'Sounds', 'CSS', ..., 'Swanky', 'Shields' */ /* How did it get into my journal? */ "source" { if(s->is_ws_expected || s->edict.size) return x->symbol = SYNTAX, 0; s->is_ws_expected = 1, s->is_source = 1; s->edict.size = 2; s->edict.expect[1] = EXPECT_KEYWORD; s->edict.expect[0] = EXPECT_END_TEXT; return x->symbol = SOURCE, 1; } "default" { if(s->is_ws_expected || !s->is_source) return x->symbol = SYNTAX, 0; s->is_ws_expected = 1, s->is_source = 0; return x->symbol = DEFAULT, 1; } /* Editorializing; looking back. */ "ed" { if(s->is_ws_expected || s->edict.size) return x->symbol = SYNTAX, 0; s->is_ws_expected = 1; /* no idea, just copy; probably should do sth */ s->edict.size = 1; s->edict.expect[0] = EXPECT_END_TEXT; /* Pithy comment. */ return x->symbol = EDITORIALIZING, 1; } /* Score. */ "significant" { if(s->is_ws_expected || s->edict.size) return x->symbol = SYNTAX, 0; s->is_ws_expected = 1; s->edict.size = 3; s->edict.expect[2] = EXPECT_NATURAL; /* Ordinal. */ s->edict.expect[1] = EXPECT_RESTRICT_TEXT; /* Name. */ s->edict.expect[0] = EXPECT_DATE; /* Birthday. */ return x->symbol = SIGNIFICANT, 1; } @s0 natural @s1 { if(s->is_ws_expected || s->edict.size) return x->symbol = SYNTAX, 0; s->is_ws_expected = 1; x->s0 = s0, x->s1 = s1; return x->symbol = SIGNIFICANT_RECALL, 1; } /* General [edict: whatever]. */ ws+ { s->is_ws_expected = 0; goto scan; } ":" { if(!s->edict.size) return x->symbol = SYNTAX, 0; s->is_ws_expected = 0, s->is_source = 0; expect_pop(); goto scan; } ws* @s0 keyword @s1 ws* ";"? { x->s0 = s0, x->s1 = s1; expect_pop(); return x->symbol = ARG_KEYWORD, 1; } ws* @s0 date @s1 ws* ";"? { x->s0 = s0, x->s1 = s1; expect_pop(); return x->symbol = ARG_DATE, 1; } ws* @s0 natural @s1 ws* ";"? { x->s0 = s0, x->s1 = s1; expect_pop(); return x->symbol = ARG_NATURAL, 1; } ws* @s0 (glyph \ [;[\]]) ((glyph \ [;[\]]) | ws)* @s1 ws* ";"? { x->s0 = s0, x->s1 = s1; expect_pop(); return x->symbol = ARG_RESTRICT_TEXT, 1; } ws* @s0 (glyph \ [[\]]) ((glyph \ [[\]]) | ws)* @s1 ws* { x->s0 = s0, x->s1 = s1; expect_pop(); return x->symbol = ARG_END_TEXT, 1; } "]" => expect_line { if(s->edict.size) return 0; goto scan; } */ } #endif