/** @license 2022 Neil Edelman, distributed under the terms of the [MIT License](https://opensource.org/licenses/MIT). Lexer for journal entries. @std C89/90 */ #include "../src/lex.h" #include #include #include #include #include /*!re2c re2c:yyfill:enable = 0; re2c:define:YYCTYPE = char; */ int lex_looks_like_year(const char *const a, int *const year) { const char *YYCURSOR = a, *YYMARKER = a, *s0; /*!stags:re2c format = 'const char *@@;\n'; */ assert(a && year); /*!re2c @s0 ("-"? [1-9][0-9]* | "0") "\x00" { int sign = 1, mag; if(*s0 == '-') { sign = -1; s0++; } for(mag = 0; *s0 != '\0'; s0++) { int d = *s0 - '0'; if((INT_MAX - d) / 10 < mag) return 0; mag = mag * 10 + d; } *year = sign * mag; return 1; } * { return 0; } */ } int lex_looks_like_month(const char *const a) { const char *YYCURSOR = a, *YYMARKER = a, *s0; /*!stags:re2c format = 'const char *@@;\n'; */ assert(a); /*!re2c @s0 [0-1][0-9] "\x00" { int val = 10 * (s0[0] - '0') + (s0[1] - '0'); return val < 1 || val > 12 ? 0 : val; } * { return 0; } */ } int lex_looks_like_day(const char *const a) { const char *YYCURSOR = a, *YYMARKER = a, *s0; /*!stags:re2c format = 'const char *@@;\n'; */ assert(a); /*!re2c @s0 [0-3][0-9] ".txt\x00" { int val = 10 * (s0[0] - '0') + (s0[1] - '0'); return val < 1 || val > 31 ? 0 : val; } * { return 0; } */ } /* This defines `enum condition`. */ /*!types:re2c*/ #define EXPECT X(KEYWORD), X(DATE), X(FREEFORM) /** scanner reads a file and extracts semantic information. Valid to access only while underlying pointers do not change. */ static struct scan { /* `re2c` variables; these point directly into `buffer`. */ const char *marker, *ctx_marker, *from, *cursor; /* Weird `c2re` stuff: these fields have to come after when >5? */ const char *label, *buffer; enum condition condition; size_t line; int is_ws_expected, is_source; #define X(n) EXPECT_ ## n /* "[something: expect; there; to; be; args]", in this case, `size = 5` and expect would be 5 `EXPECT_KEYWORD`. */ struct { unsigned size; enum { EXPECT } expect[20]; } command; #undef X } scan; /* Terrible, gah. */ void lex_reset(const char *const buffer) { scan.marker = scan.ctx_marker = scan.from = scan.cursor = scan.label = scan.buffer = buffer; scan.condition = 0; scan.line = 1; } /*!re2c re2c:flags:tags = 1; re2c:define:YYCURSOR = scan.cursor; re2c:define:YYMARKER = scan.marker; re2c:define:YYCTXMARKER = scan.ctx_marker; re2c:define:YYCONDTYPE = 'condition'; re2c:define:YYGETCONDITION = 'scan.condition'; re2c:define:YYGETCONDITION:naked = 1; re2c:define:YYSETCONDITION = 'scan.condition = @@;'; re2c:define:YYSETCONDITION:naked = 1; // Eof is marked by null when preparing files for lexing. // Mutually exclusive; only !, [, are not covered. end = "\x00"; newline = "\n" | "\r" "\n"?; ws = [ \t\v\f]; glyph = [^ \t\n\r\v\f![\x00]; glyphs = glyph+; // inside the block natural = [1-9][0-9]*; decimal = "-"? ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0]; id = [a-zA-Z_][a-zA-Z_\-0-9]{0,63}; date = "-"? natural "-" [0-1][0-9] "-" [0-1][0-9]; */ static void expect_pop(void) { printf(""); if(!scan.command.size) { printf("nostack\n"); scan.condition = yyccommand_end; return; } switch(scan.command.expect[--scan.command.size]) { case EXPECT_KEYWORD: printf("keyword\n");scan.condition = yyccommand_keyword; break; case EXPECT_DATE: printf("date\n");scan.condition = yyccommand_date; break; case EXPECT_FREEFORM: printf("freeform\n");scan.condition = yyccommand_freeform; break; } } int lex_next(struct lex *const x) { const char *s0, *s1; const size_t prev_line = scan.line; /*!stags:re2c format = 'const char *@@;\n'; */ assert(x); if(!scan.buffer) return 0; x->s0 = x->s1 = 0; x->line = prev_line; x->ws_before = 0; x->new_paragraph = 0; scan: /*!re2c end { printf("end\n");return x->symbol = END, 0; } end { return x->symbol = ERROR, 0; } newline { x->line = ++scan.line; x->new_paragraph = prev_line + 2 >= scan.line; x->ws_before = 1; goto scan; } ws+ { x->ws_before = 1; goto scan; } @s0 glyph+ @s1 { x->s0 = s0, x->s1 = s1; return x->symbol = TEXT, 1; } @s0 "!" @s1 { x->s0 = s0, x->s1 = s1; return x->symbol = BANG, 1; } "\\" @s0 "[" @s1 { x->s0 = s0, x->s1 = s1; return x->symbol = BRACKET, 1; } "![" :=> image "[" :=> command ws* "osm" ws* "](geo:" @s0 decimal "," @s1 decimal ")" => text { x->symbol = MAP, x->s0 = s0, x->s1 = s1; printf("Got a map.\n"); return 1; } * { printf("image(broken)\n");return 0; } // source "source" { if(scan.is_ws_expected || scan.command.size) return x->symbol = ERROR, 0; scan.is_ws_expected = 1, scan.is_source = 1; scan.command.size = 2; scan.command.expect[1] = EXPECT_KEYWORD; scan.command.expect[0] = EXPECT_FREEFORM; return x->symbol = SOURCE, 1; } "default" { if(scan.is_ws_expected || !scan.is_source) return x->symbol = ERROR, 0; scan.is_ws_expected = 1, scan.is_source = 0; return x->symbol = DEFAULT, 1; } // score "significant" { if(scan.is_ws_expected || scan.command.size) return x->symbol = ERROR, 0; scan.is_ws_expected = 1; scan.command.size = 2; scan.command.expect[1] = EXPECT_FREEFORM; scan.command.expect[0] = EXPECT_DATE; return x->symbol = SIGNIFICANT, 1; } @s0 natural @s1 { if(scan.is_ws_expected || scan.command.size) return x->symbol = ERROR, 0; scan.is_ws_expected = 1; x->s0 = s0, x->s1 = s1; return x->symbol = SCORE, 1; } // general command stuff ws+ { scan.is_ws_expected = 0; goto scan; } ":" { if(!scan.command.size) return x->symbol = ERROR, 0; scan.is_ws_expected = 0, scan.is_source = 0; expect_pop(); goto scan; } ws* @s0 id @s1 ws* ";"? / "]"? { x->s0 = s0, x->s1 = s1; expect_pop(); return x->symbol = ARG_KEYWORD, 1; } ws* @s0 date @s1 ws* ";"? / "]"? { x->s0 = s0, x->s1 = s1; expect_pop(); return x->symbol = ARG_DATE, 1; } ws* @s0 [^\t\n\r\v\f;[\]\x00]* @s1 ws* ";"? / "]"? { x->s0 = s0, x->s1 = s1; expect_pop(); return x->symbol = ARG_FREEFORM, 1; } "]" => text { printf("]\n"); if(scan.command.size) { printf("huh?\n"); return 0; } goto scan; } * { return x->symbol = ERROR, 0; } */ assert(0); }