224 lines
6.5 KiB
C
224 lines
6.5 KiB
C
/** @license 2022 Neil Edelman, distributed under the terms of the
|
|
[MIT License](https://opensource.org/licenses/MIT).
|
|
|
|
Lexer for journal entries.
|
|
|
|
@std C89/90 */
|
|
|
|
#include "../src/lex.h"
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <assert.h>
|
|
#include <limits.h>
|
|
#include <errno.h>
|
|
|
|
/*!re2c
|
|
re2c:yyfill:enable = 0;
|
|
re2c:define:YYCTYPE = char;
|
|
*/
|
|
|
|
int lex_looks_like_year(const char *const a, int *const year) {
|
|
const char *YYCURSOR = a, *YYMARKER = a, *s0;
|
|
/*!stags:re2c format = 'const char *@@;\n'; */
|
|
assert(a && year);
|
|
/*!re2c
|
|
@s0 ("-"? [1-9][0-9]* | "0") "\x00" {
|
|
int sign = 1, mag;
|
|
if(*s0 == '-') { sign = -1; s0++; }
|
|
for(mag = 0; *s0 != '\0'; s0++) {
|
|
int d = *s0 - '0';
|
|
if((INT_MAX - d) / 10 < mag) return 0;
|
|
mag = mag * 10 + d;
|
|
}
|
|
*year = sign * mag;
|
|
return 1;
|
|
}
|
|
* { return 0; }
|
|
*/
|
|
}
|
|
|
|
int lex_looks_like_month(const char *const a) {
|
|
const char *YYCURSOR = a, *YYMARKER = a, *s0;
|
|
/*!stags:re2c format = 'const char *@@;\n'; */
|
|
assert(a);
|
|
/*!re2c
|
|
@s0 [0-1][0-9] "\x00" {
|
|
int val = 10 * (s0[0] - '0') + (s0[1] - '0');
|
|
return val < 1 || val > 12 ? 0 : val;
|
|
}
|
|
* { return 0; }
|
|
*/
|
|
}
|
|
|
|
int lex_looks_like_day(const char *const a) {
|
|
const char *YYCURSOR = a, *YYMARKER = a, *s0;
|
|
/*!stags:re2c format = 'const char *@@;\n'; */
|
|
assert(a);
|
|
/*!re2c
|
|
@s0 [0-3][0-9] ".txt\x00" {
|
|
int val = 10 * (s0[0] - '0') + (s0[1] - '0');
|
|
return val < 1 || val > 31 ? 0 : val;
|
|
}
|
|
* { return 0; }
|
|
*/
|
|
}
|
|
|
|
/* This defines `enum condition`. */
|
|
/*!types:re2c*/
|
|
|
|
#define EXPECT X(KEYWORD), X(DATE), X(FREEFORM)
|
|
|
|
/** scanner reads a file and extracts semantic information. Valid to access
|
|
only while underlying pointers do not change. */
|
|
static struct scan {
|
|
/* `re2c` variables; these point directly into `buffer`. */
|
|
const char *marker, *ctx_marker, *from, *cursor;
|
|
/* Weird `c2re` stuff: these fields have to come after when >5? */
|
|
const char *label, *buffer;
|
|
enum condition condition;
|
|
size_t line;
|
|
int is_ws_expected, is_source;
|
|
#define X(n) EXPECT_ ## n
|
|
/* "[something: expect; there; to; be; args]", in this case, `size = 5` and
|
|
expect would be 5 `EXPECT_KEYWORD`. */
|
|
struct { unsigned size; enum { EXPECT } expect[20]; } command;
|
|
#undef X
|
|
} scan; /* Terrible, gah. */
|
|
|
|
void lex_reset(const char *const buffer) {
|
|
scan.marker = scan.ctx_marker = scan.from = scan.cursor = scan.label
|
|
= scan.buffer = buffer;
|
|
scan.condition = 0;
|
|
scan.line = 1;
|
|
}
|
|
|
|
/*!re2c
|
|
re2c:flags:tags = 1;
|
|
re2c:define:YYCURSOR = scan.cursor;
|
|
re2c:define:YYMARKER = scan.marker;
|
|
re2c:define:YYCTXMARKER = scan.ctx_marker;
|
|
re2c:define:YYCONDTYPE = 'condition';
|
|
re2c:define:YYGETCONDITION = 'scan.condition';
|
|
re2c:define:YYGETCONDITION:naked = 1;
|
|
re2c:define:YYSETCONDITION = 'scan.condition = @@;';
|
|
re2c:define:YYSETCONDITION:naked = 1;
|
|
|
|
// Eof is marked by null when preparing files for lexing.
|
|
// Mutually exclusive; only !, [, are not covered.
|
|
end = "\x00";
|
|
newline = "\n" | "\r" "\n"?;
|
|
ws = [ \t\v\f];
|
|
glyph = [^ \t\n\r\v\f![\x00];
|
|
glyphs = glyph+;
|
|
|
|
// inside the block
|
|
natural = [1-9][0-9]*;
|
|
decimal = "-"? ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0];
|
|
id = [a-zA-Z_][a-zA-Z_\-0-9]{0,63};
|
|
date = "-"? natural "-" [0-1][0-9] "-" [0-1][0-9];
|
|
*/
|
|
|
|
static void expect_pop(void) {
|
|
printf("<expect_pop>");
|
|
if(!scan.command.size) { printf("nostack\n"); scan.condition = yyccommand_end; return; }
|
|
switch(scan.command.expect[--scan.command.size]) {
|
|
case EXPECT_KEYWORD: printf("keyword\n");scan.condition = yyccommand_keyword; break;
|
|
case EXPECT_DATE: printf("date\n");scan.condition = yyccommand_date; break;
|
|
case EXPECT_FREEFORM: printf("freeform\n");scan.condition = yyccommand_freeform; break;
|
|
}
|
|
}
|
|
|
|
int lex_next(struct lex *const x) {
|
|
const char *s0, *s1;
|
|
const size_t prev_line = scan.line;
|
|
/*!stags:re2c format = 'const char *@@;\n'; */
|
|
assert(x);
|
|
if(!scan.buffer) return 0;
|
|
x->s0 = x->s1 = 0;
|
|
x->line = prev_line;
|
|
x->ws_before = 0;
|
|
x->new_paragraph = 0;
|
|
scan:
|
|
/*!re2c
|
|
<text> end { printf("end\n");return x->symbol = END, 0; }
|
|
<image, command, command_keyword, command_date, command_end> end
|
|
{ return x->symbol = ERROR, 0; }
|
|
|
|
<text> newline
|
|
{ x->line = ++scan.line;
|
|
x->new_paragraph = prev_line + 2 >= scan.line;
|
|
x->ws_before = 1;
|
|
goto scan; }
|
|
<text> ws+ { x->ws_before = 1; goto scan; }
|
|
<text> @s0 glyph+ @s1 { x->s0 = s0, x->s1 = s1;
|
|
return x->symbol = TEXT, 1; }
|
|
<text> @s0 "!" @s1 { x->s0 = s0, x->s1 = s1;
|
|
return x->symbol = BANG, 1; }
|
|
<text> "\\" @s0 "[" @s1 { x->s0 = s0, x->s1 = s1;
|
|
return x->symbol = BRACKET, 1; }
|
|
<text> "![" :=> image
|
|
<text> "[" :=> command
|
|
|
|
<image> ws* "osm" ws* "](geo:" @s0 decimal "," @s1 decimal ")" => text {
|
|
x->symbol = MAP, x->s0 = s0, x->s1 = s1;
|
|
printf("Got a map.\n");
|
|
return 1;
|
|
}
|
|
<image> * { printf("image(broken)\n");return 0; }
|
|
|
|
// source
|
|
<command> "source"
|
|
{ if(scan.is_ws_expected || scan.command.size)
|
|
return x->symbol = ERROR, 0;
|
|
scan.is_ws_expected = 1, scan.is_source = 1;
|
|
scan.command.size = 2;
|
|
scan.command.expect[1] = EXPECT_KEYWORD;
|
|
scan.command.expect[0] = EXPECT_FREEFORM;
|
|
return x->symbol = SOURCE, 1; }
|
|
<command> "default"
|
|
{ if(scan.is_ws_expected || !scan.is_source)
|
|
return x->symbol = ERROR, 0;
|
|
scan.is_ws_expected = 1, scan.is_source = 0;
|
|
return x->symbol = DEFAULT, 1; }
|
|
|
|
// score
|
|
<command> "significant"
|
|
{ if(scan.is_ws_expected || scan.command.size)
|
|
return x->symbol = ERROR, 0;
|
|
scan.is_ws_expected = 1;
|
|
scan.command.size = 2;
|
|
scan.command.expect[1] = EXPECT_FREEFORM;
|
|
scan.command.expect[0] = EXPECT_DATE;
|
|
return x->symbol = SIGNIFICANT, 1; }
|
|
<command> @s0 natural @s1
|
|
{ if(scan.is_ws_expected || scan.command.size)
|
|
return x->symbol = ERROR, 0;
|
|
scan.is_ws_expected = 1;
|
|
x->s0 = s0, x->s1 = s1;
|
|
return x->symbol = SCORE, 1; }
|
|
|
|
// general command stuff
|
|
<command> ws+ { scan.is_ws_expected = 0; goto scan; }
|
|
<command> ":"
|
|
{ if(!scan.command.size) return x->symbol = ERROR, 0;
|
|
scan.is_ws_expected = 0, scan.is_source = 0;
|
|
expect_pop(); goto scan; }
|
|
<command_keyword> ws* @s0 id @s1 ws* ";"? / "]"?
|
|
{ x->s0 = s0, x->s1 = s1; expect_pop();
|
|
return x->symbol = ARG_KEYWORD, 1; }
|
|
<command_date> ws* @s0 date @s1 ws* ";"? / "]"?
|
|
{ x->s0 = s0, x->s1 = s1; expect_pop();
|
|
return x->symbol = ARG_DATE, 1; }
|
|
<command_freeform> ws* @s0 [^\t\n\r\v\f;[\]\x00]* @s1 ws* ";"? / "]"?
|
|
{ x->s0 = s0, x->s1 = s1; expect_pop();
|
|
return x->symbol = ARG_FREEFORM, 1; }
|
|
<command, command_end> "]" => text
|
|
{ printf("]\n"); if(scan.command.size) { printf("huh?\n"); return 0; }
|
|
goto scan;
|
|
}
|
|
<command, command_keyword, command_date, command_freeform, command_end> *
|
|
{ return x->symbol = ERROR, 0; }
|
|
*/
|
|
assert(0);
|
|
}
|