interpret/src/scan.re.c

229 lines
7.6 KiB
C
Raw Normal View History

2022-02-13 01:35:02 -05:00
/** @license 2022 Neil Edelman, distributed under the terms of the
[MIT License](https://opensource.org/licenses/MIT).
Lexer for journal entries.
2022-12-28 14:27:01 -05:00
"^[edict: expect; there; to; be; args] Something.\n"
"^[<num>] Something.\n"
"<num>+.<num>{2}(<num>)$"
"<bookref> -- \"*\n"
2022-02-13 01:35:02 -05:00
@std C89/90 */
2022-12-28 17:04:49 -05:00
#include "../src/scan.h"
2022-02-13 01:35:02 -05:00
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <limits.h>
#include <errno.h>
/* This defines `enum condition`. */
/*!types:re2c*/
2022-12-28 17:04:49 -05:00
int scan(union date32 date, const char *const buffer) {
const char *YYCURSOR = buffer;
/*!re2c /**/
re2c:define:YYCTYPE = char;
re2c:yyfill:enable = 0;
*/
2022-02-13 23:45:38 -05:00
}
2022-02-13 01:35:02 -05:00
2022-12-28 17:04:49 -05:00
#if 0
struct scan scan(const char *const buffer) {
struct scan scan;
scan.marker = scan.from = scan.cursor = scan.label = scan.buffer = buffer;
scan.condition = yycline;
scan.line = 1;
return scan;
2022-02-15 01:02:32 -05:00
}
2022-12-28 17:04:49 -05:00
int scan_next(struct scan *const s, struct lex *const x) {
2022-07-06 17:59:56 -04:00
/*!re2c /**/
re2c:flags:tags = 1;
2022-12-28 17:04:49 -05:00
re2c:define:YYCTYPE = char;
re2c:yyfill:enable = 0;
re2c:define:YYCURSOR = s->cursor;
re2c:define:YYMARKER = s->marker;
re2c:define:YYCONDTYPE = 'condition';
2022-12-28 17:04:49 -05:00
re2c:define:YYGETCONDITION = 's->condition';
re2c:define:YYGETCONDITION:naked = 1;
2022-12-28 17:04:49 -05:00
re2c:define:YYSETCONDITION = 's->condition = @@;';
re2c:define:YYSETCONDITION:naked = 1;
2022-02-15 22:48:50 -05:00
sentinel = "\x00";
newline = "\n";
2022-12-13 00:25:28 -05:00
unix_control = [\x01-\x08\x0a-\x1f\x7f];
2022-02-15 22:48:50 -05:00
ws = [ \t];
2022-12-13 00:25:28 -05:00
glyph = [^] \ (sentinel | unix_control | newline | ws);
keyword = ([a-zA-Z] | [0-9][0-9_\-]*[a-zA-Z]) [a-zA-Z0-9_\-]*;
2022-02-15 22:48:50 -05:00
decimal = "-"? ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0];
natural = [1-9][0-9]*;
date = "-"? natural "-" [0-1][0-9] "-" [0-1][0-9];
*/
2022-02-13 01:35:02 -05:00
const char *s0, *s1;
/*!stags:re2c format = 'const char *@@;\n'; */
2022-12-28 17:04:49 -05:00
assert(s && x);
if(!s->buffer) return 0;
x->line = s->line;
2022-02-13 23:45:38 -05:00
x->s0 = x->s1 = 0;
2022-02-13 01:35:02 -05:00
scan:
2022-07-06 17:59:56 -04:00
/*!re2c /**/
2022-12-13 00:25:28 -05:00
<*> unix_control { return x->symbol = ILLEGAL, 0; }
2022-02-15 22:48:50 -05:00
<*> * { return x->symbol = SYNTAX, 0; }
2022-12-13 00:25:28 -05:00
<*> sentinel /* New line always delimits. */
2022-12-28 17:04:49 -05:00
{ return x->symbol = s->condition == yycline ? END : ILLEGAL, 0; }
<expect_line> newline => line { x->line = ++s->line; goto scan; }
2022-12-13 00:25:28 -05:00
/* Symbols that go at the beginning of a line. */
2022-12-28 17:04:49 -05:00
<line> newline { x->line = ++s->line; goto scan; }
2022-07-18 01:01:34 -04:00
<line> "[" :=> edict
2022-02-15 22:48:50 -05:00
<line> "--" :=> source
<line> "->" :=> location
2022-07-18 01:01:34 -04:00
<line> "!" => text { return x->symbol = COMPLETE, 1; }
<line> "^" => text { return x->symbol = CANCELLED, 1; }
<line> "#" => text { return x->symbol = HEADING, 1; }
2022-12-28 17:04:49 -05:00
<line> * :=> text
2022-12-28 17:04:49 -05:00
<text> newline => line { x->line = ++s->line; goto scan; }
2022-07-05 22:21:56 -04:00
<text, bible> ws+ { goto scan; }
2022-02-15 22:48:50 -05:00
<text> @s0 glyph+ @s1
{ x->s0 = s0, x->s1 = s1; return x->symbol = TEXT, 1; }
2022-07-06 17:59:56 -04:00
bible_ref = natural ":" natural [ab]? ("-" (natural ":")? natural [ab]?)?;
glyph_minus = glyph \ ['];
2022-07-05 22:21:56 -04:00
<text> @s0 ("Genesis" | "Exodus" | "Leviticus" | "Numbers" | "Deuteronomy"
| "Joshua" | "Judges" | "Ruth" | "I"{1,2} " Samuel" | "I"{1,2} " Kings"
| "I"{1,2} " Chronicles" | "Ezra" | "Nehemiah" | "Esther" | "Job"
2022-07-05 22:21:56 -04:00
| "Psalms" | "Proverbs" | "Ecclesiastes" | "Song of Solomon" | "Isaiah"
| "Jeremiah" | "Lamentations" | "Ezekiel" | "Daniel" | "Hosea" | "Joel"
| "Amos" | "Obadiah" | "Jonah" | "Micah" | "Nahum" | "Habakkuk"
| "Zephaniah" | "Haggai" | "Zechariah" | "Malachi" | "Matthew" | "Mark"
| "Luke" | "John" | "Acts" | "Romans" | "I"{1,2} " Corinthians"
2022-07-05 22:21:56 -04:00
| "Galatians" | "Ephesians" | "Philippians" | "Colossians"
| "I"{1,2} " Thessalonians" | "I"{1,2} " Timothy" | "Titus" | "Philemon"
| "Hebrews" | "James" | "I"{1,2} " Peter" | "I"{1,3} " John" | "Jude"
2022-07-06 17:59:56 -04:00
| "Revelation") @s1 ws* / bible_ref ws+ "--" ws+ "``"
2022-07-24 20:38:56 -04:00
=> bible { x->s0 = s0, x->s1 = s1; return x->symbol = KJV_BOOK, 1; }
2022-07-06 17:59:56 -04:00
<bible> @s0 bible_ref @s1 ws+ "--" ws+ "``"
2022-07-24 20:38:56 -04:00
{ x->s0 = s0, x->s1 = s1; return x->symbol = KJV_CHAPTER_VERSE, 1; }
<bible> "``" { return x->symbol = KJV_NEXT, 1; }
<bible> "''" :=> text
/* fixme: This is a hack that doesn't allow apostrophes at the end of a
word, (not sure there are any in the bible.) Is ' terminated by '';
otherwise same as glyph+ above. */
2022-07-18 01:01:34 -04:00
<bible> @s0 ("'"? glyph_minus+ ("'" glyph_minus+)*) @s1
2022-07-24 20:38:56 -04:00
{ x->s0 = s0, x->s1 = s1; return x->symbol = KJV_TEXT, 1; }
/* Multiple verses can be present, but they end in ''.
Not strictly enforced. */
2022-12-28 17:04:49 -05:00
<bible> newline / (newline | "``") { x->line = ++s->line; goto scan; }
<bible> newline { return x->symbol = SYNTAX, 0; }
2022-07-05 22:21:56 -04:00
<source> @s0 keyword @s1 => expect_line
2022-02-15 22:48:50 -05:00
{ x->s0 = s0, x->s1 = s1; return x->symbol = SOURCE_RECALL, 1; }
2022-02-14 00:07:51 -05:00
2022-02-15 23:52:02 -05:00
<location> "" / "(" :=> map
<location> "[" ws* @s0 keyword @s1 ws* "]"
{ x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION_SAVE, 1; }
2022-02-15 22:48:50 -05:00
<location> @s0 keyword @s1 => expect_line
{ x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION_RECALL, 1; }
2022-02-14 00:07:51 -05:00
2022-02-15 23:52:02 -05:00
<map> "(" @s0 decimal "," @s1 decimal ")" => expect_caption
2022-02-15 22:48:50 -05:00
{ x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION, 1; }
2022-02-15 17:03:02 -05:00
2022-12-13 00:25:28 -05:00
<edict> "source" :=> source
<edict> "ed" :=> ed
<edict> "contact" :=> contact
<edict> "glider" :=> glider
<edict> "flight" :=> flight
<edict> "bible" :=> bible
<edict> "book" :=> book
<edict> "movie" :=> movie
<edict> "tv" :=> tv
<edict> "medication" :=> medication
<edict> "idea" :=> idea
<edict> "vaccine" :=> vaccine
<edict> "in" :=> in
<edict> "" / natural :=> significant
<edict> [0-1][0-9] "-" [0-3][0-9]
", " [0-2][0-9] ":" [0-5][0-9] "] "
:=> text /* This is likely WhatsApp conversations. Ignore. */
/* missed, show, 'First, Second', 'Sounds', 'CSS', ..., 'Swanky', 'Shields' */
2022-07-17 01:24:33 -04:00
/* How did it get into my journal? */
<edict> "source"
2022-12-28 17:04:49 -05:00
{ if(s->is_ws_expected || s->edict.size)
return x->symbol = SYNTAX, 0;
2022-12-28 17:04:49 -05:00
s->is_ws_expected = 1, s->is_source = 1;
s->edict.size = 2;
s->edict.expect[1] = EXPECT_KEYWORD;
s->edict.expect[0] = EXPECT_END_TEXT;
2022-02-15 01:02:32 -05:00
return x->symbol = SOURCE, 1; }
<edict> "default"
2022-12-28 17:04:49 -05:00
{ if(s->is_ws_expected || !s->is_source)
return x->symbol = SYNTAX, 0;
2022-12-28 17:04:49 -05:00
s->is_ws_expected = 1, s->is_source = 0;
2022-02-15 01:02:32 -05:00
return x->symbol = DEFAULT, 1; }
2022-07-17 01:24:33 -04:00
/* Editorializing; looking back. */
2022-07-06 13:02:28 -04:00
<edict> "ed"
2022-12-28 17:04:49 -05:00
{ if(s->is_ws_expected || s->edict.size)
2022-07-06 13:02:28 -04:00
return x->symbol = SYNTAX, 0;
2022-12-28 17:04:49 -05:00
s->is_ws_expected = 1; /* no idea, just copy; probably should do sth */
s->edict.size = 1;
s->edict.expect[0] = EXPECT_END_TEXT; /* Pithy comment. */
2022-07-06 13:02:28 -04:00
return x->symbol = EDITORIALIZING, 1; }
2022-07-17 01:24:33 -04:00
/* Score. */
<edict> "significant"
2022-12-28 17:04:49 -05:00
{ if(s->is_ws_expected || s->edict.size)
return x->symbol = SYNTAX, 0;
2022-12-28 17:04:49 -05:00
s->is_ws_expected = 1;
s->edict.size = 3;
s->edict.expect[2] = EXPECT_NATURAL; /* Ordinal. */
s->edict.expect[1] = EXPECT_RESTRICT_TEXT; /* Name. */
s->edict.expect[0] = EXPECT_DATE; /* Birthday. */
2022-02-15 01:02:32 -05:00
return x->symbol = SIGNIFICANT, 1; }
<edict> @s0 natural @s1
2022-12-28 17:04:49 -05:00
{ if(s->is_ws_expected || s->edict.size)
return x->symbol = SYNTAX, 0;
2022-12-28 17:04:49 -05:00
s->is_ws_expected = 1;
2022-02-15 01:02:32 -05:00
x->s0 = s0, x->s1 = s1;
2022-02-15 22:48:50 -05:00
return x->symbol = SIGNIFICANT_RECALL, 1; }
2022-02-15 01:02:32 -05:00
2022-07-17 01:24:33 -04:00
/* General [edict: whatever]. */
2022-12-28 17:04:49 -05:00
<edict> ws+ { s->is_ws_expected = 0; goto scan; }
<edict> ":"
2022-12-28 17:04:49 -05:00
{ if(!s->edict.size) return x->symbol = SYNTAX, 0;
s->is_ws_expected = 0, s->is_source = 0;
2022-02-15 01:02:32 -05:00
expect_pop(); goto scan; }
2022-02-15 22:48:50 -05:00
<edict_keyword> ws* @s0 keyword @s1 ws* ";"?
{ x->s0 = s0, x->s1 = s1; expect_pop();
2022-02-15 01:02:32 -05:00
return x->symbol = ARG_KEYWORD, 1; }
2022-02-15 22:48:50 -05:00
<edict_date> ws* @s0 date @s1 ws* ";"?
{ x->s0 = s0, x->s1 = s1; expect_pop();
2022-02-15 01:02:32 -05:00
return x->symbol = ARG_DATE, 1; }
2022-02-15 22:48:50 -05:00
<edict_natural> ws* @s0 natural @s1 ws* ";"?
{ x->s0 = s0, x->s1 = s1; expect_pop();
return x->symbol = ARG_NATURAL, 1; }
2022-07-17 01:24:33 -04:00
<edict_restrict_text>
2022-02-16 00:28:35 -05:00
ws* @s0 (glyph \ [;[\]]) ((glyph \ [;[\]]) | ws)* @s1 ws* ";"?
2022-02-15 22:48:50 -05:00
{ x->s0 = s0, x->s1 = s1; expect_pop();
2022-07-17 01:24:33 -04:00
return x->symbol = ARG_RESTRICT_TEXT, 1; }
<edict_end_text>
ws* @s0 (glyph \ [[\]]) ((glyph \ [[\]]) | ws)* @s1 ws*
{ x->s0 = s0, x->s1 = s1; expect_pop();
return x->symbol = ARG_END_TEXT, 1; }
2022-02-16 00:21:53 -05:00
<edict, edict_end> "]" => expect_line
2022-12-28 17:04:49 -05:00
{ if(s->edict.size) return 0; goto scan; }
2022-02-15 01:02:32 -05:00
*/
2022-02-13 01:35:02 -05:00
}
2022-12-28 17:04:49 -05:00
#endif