Cleaned up. text->expect_line in []\n.

This commit is contained in:
Neil 2022-02-15 21:21:53 -08:00
parent bac29812ec
commit e8a5d21558

View File

@ -17,6 +17,7 @@ re2c:yyfill:enable = 0;
re2c:define:YYCTYPE = char; re2c:define:YYCTYPE = char;
*/ */
/** "-"? [1-9][0-9]*$, within the range of `INT_MAX`. */
int lex_looks_like_year(const char *const a, int *const year) { int lex_looks_like_year(const char *const a, int *const year) {
const char *YYCURSOR = a, *YYMARKER = a, *s0; const char *YYCURSOR = a, *YYMARKER = a, *s0;
/*!stags:re2c format = 'const char *@@;\n'; */ /*!stags:re2c format = 'const char *@@;\n'; */
@ -38,6 +39,7 @@ int lex_looks_like_year(const char *const a, int *const year) {
*/ */
} }
/** 1 <= [0-1][0-9]$ <= 12 */
int lex_looks_like_month(const char *const a) { int lex_looks_like_month(const char *const a) {
const char *YYCURSOR = a, *YYMARKER = a, *s0; const char *YYCURSOR = a, *YYMARKER = a, *s0;
/*!stags:re2c format = 'const char *@@;\n'; */ /*!stags:re2c format = 'const char *@@;\n'; */
@ -52,6 +54,7 @@ int lex_looks_like_month(const char *const a) {
*/ */
} }
/** 1 <= [0-3][0-9].txt$ <= 31 */
int lex_looks_like_day(const char *const a) { int lex_looks_like_day(const char *const a) {
const char *YYCURSOR = a, *YYMARKER = a, *s0; const char *YYCURSOR = a, *YYMARKER = a, *s0;
/*!stags:re2c format = 'const char *@@;\n'; */ /*!stags:re2c format = 'const char *@@;\n'; */
@ -76,8 +79,9 @@ int lex_looks_like_day(const char *const a) {
#define EXPECT_CONS Y(freeform, FREEFORM) #define EXPECT_CONS Y(freeform, FREEFORM)
#define EXPECT EXPECT_HEAD EXPECT_CONS #define EXPECT EXPECT_HEAD EXPECT_CONS
/** scanner reads a file and extracts semantic information. Valid to access /** Scan reads one file as a time and extracts semantic information. Valid to
only while underlying pointers do not change. */ access only while underlying pointers do not change. This is a singleton, not
concurrent: convenient and bad. */
static struct scan { static struct scan {
/* `re2c` variables; these point directly into `buffer`. */ /* `re2c` variables; these point directly into `buffer`. */
const char *marker, *ctx_marker, *from, *cursor; const char *marker, *ctx_marker, *from, *cursor;
@ -91,7 +95,7 @@ static struct scan {
struct { unsigned size; enum { EXPECT } expect[16]; } edict; struct { unsigned size; enum { EXPECT } expect[16]; } edict;
#undef X #undef X
#undef Y #undef Y
} scan; /* Not suited for concurrency. Simple. */ } scan;
/** Resets the buffer to some `buffer`. */ /** Resets the buffer to some `buffer`. */
void lex_reset(const char *const buffer) { void lex_reset(const char *const buffer) {
@ -129,7 +133,7 @@ int lex_next(struct lex *const x) {
illegal = [\x01-\x08\x0a-\x1f\x7f]; // unix-style control characters illegal = [\x01-\x08\x0a-\x1f\x7f]; // unix-style control characters
newline = "\n"; newline = "\n";
ws = [ \t]; ws = [ \t];
glyph = [^\x00-\x1f \x7f]; glyph = [^] \ (sentinel | illegal | newline | ws);
keyword = [a-zA-Z_][a-zA-Z0-9_\-]{0,63}; keyword = [a-zA-Z_][a-zA-Z0-9_\-]{0,63};
decimal = "-"? ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0]; decimal = "-"? ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0];
natural = [1-9][0-9]*; natural = [1-9][0-9]*;
@ -144,18 +148,17 @@ scan:
/*!re2c /*!re2c
<*> illegal { return x->symbol = ILLEGAL, 0; } <*> illegal { return x->symbol = ILLEGAL, 0; }
<*> * { return x->symbol = SYNTAX, 0; } <*> * { return x->symbol = SYNTAX, 0; }
<line> sentinel { return x->symbol = END, 0; } <*> sentinel
<text, expect_line, expect_caption, text, image, edict, edict_keyword, edict_date, edict_freeform, edict_end> { return x->symbol = scan.condition == yycline ? END : ILLEGAL, 0; }
sentinel { return x->symbol = ILLEGAL, 0; }
<expect_line> newline => line { x->line = ++scan.line; goto scan; } <expect_line> newline => line { x->line = ++scan.line; goto scan; }
<expect_caption> ws* @s0 glyph (glyph | ws)* @s1 ws* / newline
=> expect_line
{ x->s0 = s0, x->s1 = s1; return x->symbol = CAPTION, 1; }
<line> newline { x->line = ++scan.line; return x->symbol = PARAGRAPH, 1; } <line> newline { x->line = ++scan.line; return x->symbol = PARAGRAPH, 1; }
<line> "--" :=> source <line> "--" :=> source
<line> "->" :=> location <line> "->" :=> location
<line> "[" :=> edict <line> "[" :=> edict
<line> "" / glyph :=> text <line> "" / glyph :=> text
<expect_caption> ws* @s0 ([^] \ (sentinel | illegal | newline | ws))
([^] \ (sentinel | illegal | newline))* @s1 ws* / newline => expect_line
{ x->s0 = s0, x->s1 = s1; return x->symbol = CAPTION, 1; }
<text> newline => line { x->line = ++scan.line; goto scan; } <text> newline => line { x->line = ++scan.line; goto scan; }
<text> ws+ { goto scan; } <text> ws+ { goto scan; }
@ -169,9 +172,6 @@ scan:
<location> @s0 keyword @s1 => expect_line <location> @s0 keyword @s1 => expect_line
{ x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION_RECALL, 1; } { x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION_RECALL, 1; }
// This was getting a bit tiresome. Sorry compatibility with Diary.
// I like the dd*mm'ss.ss"N way better, but it is a pain to type.
//<map> "![" ws* "osm" ws* "](geo:" @s0 decimal "," @s1 decimal ")" ws*
<map> "(" @s0 decimal "," @s1 decimal ")" => expect_caption <map> "(" @s0 decimal "," @s1 decimal ")" => expect_caption
{ x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION, 1; } { x->s0 = s0, x->s1 = s1; return x->symbol = LOCATION, 1; }
@ -225,7 +225,7 @@ scan:
@s1 ws* ";"? @s1 ws* ";"?
{ x->s0 = s0, x->s1 = s1; expect_pop(); { x->s0 = s0, x->s1 = s1; expect_pop();
return x->symbol = ARG_FREEFORM, 1; } return x->symbol = ARG_FREEFORM, 1; }
<edict, edict_end> "]" => text <edict, edict_end> "]" => expect_line
{ if(scan.edict.size) return 0; goto scan; } { if(scan.edict.size) return 0; goto scan; }
<edict, edict_keyword, edict_date, edict_freeform, edict_end> * <edict, edict_keyword, edict_date, edict_freeform, edict_end> *
{ return x->symbol = SYNTAX, 0; } { return x->symbol = SYNTAX, 0; }