More strict: separated we've just seen a newline from general text.
This commit is contained in:
parent
4fdb66076a
commit
fb814b7223
@ -15,9 +15,9 @@ static void int_to_string(const int *const n,
|
|||||||
#define ARRAY_NAME int
|
#define ARRAY_NAME int
|
||||||
#define ARRAY_TYPE int
|
#define ARRAY_TYPE int
|
||||||
#define ARRAY_EXPECT_TRAIT
|
#define ARRAY_EXPECT_TRAIT
|
||||||
#include "../src/array.h"
|
#include "array.h"
|
||||||
#define ARRAY_TO_STRING &int_to_string
|
#define ARRAY_TO_STRING &int_to_string
|
||||||
#include "../src/array.h"
|
#include "array.h"
|
||||||
static int int_cmp(const int *const a, const int *const b)
|
static int int_cmp(const int *const a, const int *const b)
|
||||||
{ return (*b < *a) - (*a < *b); }
|
{ return (*b < *a) - (*a < *b); }
|
||||||
static int void_int_cmp(const void *const a, const void *const b)
|
static int void_int_cmp(const void *const a, const void *const b)
|
||||||
|
@ -4,10 +4,11 @@ int lex_looks_like_year(const char *, int *);
|
|||||||
int lex_looks_like_month(const char *);
|
int lex_looks_like_month(const char *);
|
||||||
int lex_looks_like_day(const char *);
|
int lex_looks_like_day(const char *);
|
||||||
|
|
||||||
#define LEX_SYMBOL X(END), X(ERROR), X(TEXT), X(BANG), X(BRACKET), X(WHITE), \
|
#define LEX_SYMBOL \
|
||||||
X(MAP), \
|
/* Results. */ X(END), X(SYNTAX), X(ILLEGAL), X(NOT_FOUND), \
|
||||||
/* Commands */ X(SOURCE), X(DEFAULT), X(SIGNIFICANT), X(SCORE), \
|
/* Text */ X(PARAGRAPH), X(TEXT), \
|
||||||
/* Arguments */ X(ARG_KEYWORD), X(ARG_DATE), X(ARG_NATURAL), X(ARG_FREEFORM)
|
/* Directive. */ X(SOURCE), X(DEFAULT), X(SIGNIFICANT), X(SCORE), X(MAP), \
|
||||||
|
/* Arguments. */ X(ARG_KEYWORD), X(ARG_DATE), X(ARG_NATURAL), X(ARG_FREEFORM)
|
||||||
|
|
||||||
#define X(n) n
|
#define X(n) n
|
||||||
struct lex {
|
struct lex {
|
||||||
|
135
src/lex.re_c.c
135
src/lex.re_c.c
@ -20,6 +20,7 @@ re2c:define:YYCTYPE = char;
|
|||||||
int lex_looks_like_year(const char *const a, int *const year) {
|
int lex_looks_like_year(const char *const a, int *const year) {
|
||||||
const char *YYCURSOR = a, *YYMARKER = a, *s0;
|
const char *YYCURSOR = a, *YYMARKER = a, *s0;
|
||||||
/*!stags:re2c format = 'const char *@@;\n'; */
|
/*!stags:re2c format = 'const char *@@;\n'; */
|
||||||
|
(void)yyt1;
|
||||||
assert(a && year);
|
assert(a && year);
|
||||||
/*!re2c
|
/*!re2c
|
||||||
@s0 ("-"? [1-9][0-9]* | "0") "\x00" {
|
@s0 ("-"? [1-9][0-9]* | "0") "\x00" {
|
||||||
@ -81,7 +82,7 @@ static struct scan {
|
|||||||
#define X(n) EXPECT_ ## n
|
#define X(n) EXPECT_ ## n
|
||||||
/* "[something: expect; there; to; be; args]", in this case, expect would
|
/* "[something: expect; there; to; be; args]", in this case, expect would
|
||||||
be a stack of `size = 5` `EXPECT_KEYWORD`. */
|
be a stack of `size = 5` `EXPECT_KEYWORD`. */
|
||||||
struct { unsigned size; enum { EXPECT } expect[16]; } command;
|
struct { unsigned size; enum { EXPECT } expect[16]; } edict;
|
||||||
#undef X
|
#undef X
|
||||||
} scan; /* Terrible, gah. */
|
} scan; /* Terrible, gah. */
|
||||||
|
|
||||||
@ -92,6 +93,19 @@ void lex_reset(const char *const buffer) {
|
|||||||
scan.line = 1;
|
scan.line = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** I don't think `re2c` supports branching on variable conditions.
|
||||||
|
It does now? */
|
||||||
|
static void expect_pop(void) {
|
||||||
|
printf("<expect_pop>");
|
||||||
|
if(!scan.edict.size) { printf("allfinished\n"); scan.condition = yycedict_end; return; }
|
||||||
|
switch(scan.edict.expect[--scan.edict.size]) {
|
||||||
|
case EXPECT_KEYWORD: printf("keyword\n");scan.condition = yycedict_keyword; break;
|
||||||
|
case EXPECT_DATE: printf("date\n");scan.condition = yycedict_date; break;
|
||||||
|
case EXPECT_FREEFORM: printf("freeform\n");scan.condition = yycedict_freeform; break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int lex_next(struct lex *const x) {
|
||||||
/*!re2c
|
/*!re2c
|
||||||
re2c:flags:tags = 1;
|
re2c:flags:tags = 1;
|
||||||
re2c:define:YYCURSOR = scan.cursor;
|
re2c:define:YYCURSOR = scan.cursor;
|
||||||
@ -102,35 +116,7 @@ re2c:define:YYGETCONDITION = 'scan.condition';
|
|||||||
re2c:define:YYGETCONDITION:naked = 1;
|
re2c:define:YYGETCONDITION:naked = 1;
|
||||||
re2c:define:YYSETCONDITION = 'scan.condition = @@;';
|
re2c:define:YYSETCONDITION = 'scan.condition = @@;';
|
||||||
re2c:define:YYSETCONDITION:naked = 1;
|
re2c:define:YYSETCONDITION:naked = 1;
|
||||||
|
|
||||||
// Eof is marked by null when preparing files for lexing.
|
|
||||||
// Mutually exclusive; only !, [, are not covered.
|
|
||||||
end = "\x00";
|
|
||||||
newline = "\n" | "\r" "\n"?;
|
|
||||||
ws = [ \t\v\f];
|
|
||||||
glyph = [^ \t\n\r\v\f![\x00];
|
|
||||||
glyphs = glyph+;
|
|
||||||
|
|
||||||
// inside the block
|
|
||||||
natural = [1-9][0-9]*;
|
|
||||||
decimal = "-"? ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0];
|
|
||||||
id = [a-zA-Z_][a-zA-Z_\-0-9]{0,63};
|
|
||||||
date = "-"? natural "-" [0-1][0-9] "-" [0-1][0-9];
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** I don't think `re2c` supports branching on variable conditions.
|
|
||||||
It does now? */
|
|
||||||
static void expect_pop(void) {
|
|
||||||
printf("<expect_pop>");
|
|
||||||
if(!scan.command.size) { printf("allfinished\n"); scan.condition = yyccommand_end; return; }
|
|
||||||
switch(scan.command.expect[--scan.command.size]) {
|
|
||||||
case EXPECT_KEYWORD: printf("keyword\n");scan.condition = yyccommand_keyword; break;
|
|
||||||
case EXPECT_DATE: printf("date\n");scan.condition = yyccommand_date; break;
|
|
||||||
case EXPECT_FREEFORM: printf("freeform\n");scan.condition = yyccommand_freeform; break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int lex_next(struct lex *const x) {
|
|
||||||
const char *s0, *s1;
|
const char *s0, *s1;
|
||||||
const size_t prev_line = scan.line;
|
const size_t prev_line = scan.line;
|
||||||
/*!stags:re2c format = 'const char *@@;\n'; */
|
/*!stags:re2c format = 'const char *@@;\n'; */
|
||||||
@ -142,24 +128,27 @@ int lex_next(struct lex *const x) {
|
|||||||
x->new_paragraph = 0;
|
x->new_paragraph = 0;
|
||||||
scan:
|
scan:
|
||||||
/*!re2c
|
/*!re2c
|
||||||
<text> end { printf("end\n");return x->symbol = END, 0; }
|
end = "\x00";
|
||||||
<image, command, command_keyword, command_date, command_end> end
|
illegal = [\x01-\x08\x0a-\x1f\x7f]; // unix-style control characters
|
||||||
{ return x->symbol = ERROR, 0; }
|
newline = "\n";
|
||||||
|
ws = [ \t];
|
||||||
|
glyph = [^\x00-\x1f \x7f];
|
||||||
|
<*> illegal { return x->symbol = ILLEGAL, 0; }
|
||||||
|
<line> end { return x->symbol = END, 0; }
|
||||||
|
<text, text, image, edict, edict_keyword, edict_date, edict_freeform, edict_end>
|
||||||
|
end { return x->symbol = ILLEGAL, 0; }
|
||||||
|
<line> newline { x->line = ++scan.line; return x->symbol = PARAGRAPH, 1; }
|
||||||
|
<line> "![" :=> image
|
||||||
|
<line> "[" :=> edict
|
||||||
|
<line> "" / glyph :=> text
|
||||||
|
<line> * { return x->symbol = SYNTAX, 1; }
|
||||||
|
|
||||||
<text> newline
|
<text> newline => line { x->line = ++scan.line; goto scan; }
|
||||||
{ x->line = ++scan.line;
|
<text> ws+ { goto scan; }
|
||||||
x->new_paragraph = prev_line + 2 >= scan.line;
|
|
||||||
x->ws_before = 1;
|
|
||||||
goto scan; }
|
|
||||||
<text> ws+ { x->ws_before = 1; goto scan; }
|
|
||||||
<text> @s0 glyph+ @s1 { x->s0 = s0, x->s1 = s1;
|
<text> @s0 glyph+ @s1 { x->s0 = s0, x->s1 = s1;
|
||||||
return x->symbol = TEXT, 1; }
|
return x->symbol = TEXT, 1; }
|
||||||
<text> @s0 "!" @s1 { x->s0 = s0, x->s1 = s1;
|
|
||||||
return x->symbol = BANG, 1; }
|
decimal = "-"? ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0];
|
||||||
<text> "\\" @s0 "[" @s1 { x->s0 = s0, x->s1 = s1;
|
|
||||||
return x->symbol = BRACKET, 1; }
|
|
||||||
<text> "" => text {
|
<image> ws* "osm" ws* "](geo:" @s0 decimal "," @s1 decimal ")" => text {
|
||||||
x->symbol = MAP, x->s0 = s0, x->s1 = s1;
|
x->symbol = MAP, x->s0 = s0, x->s1 = s1;
|
||||||
@ -168,58 +157,60 @@ scan:
|
|||||||
}
|
}
|
||||||
<image> * { printf("image(broken)\n");return 0; }
|
<image> * { printf("image(broken)\n");return 0; }
|
||||||
|
|
||||||
|
natural = [1-9][0-9]*;
|
||||||
|
id = [a-zA-Z_][a-zA-Z_\-0-9]{0,63};
|
||||||
|
date = "-"? natural "-" [0-1][0-9] "-" [0-1][0-9];
|
||||||
// source
|
// source
|
||||||
<command> "source"
|
<edict> "source"
|
||||||
{ if(scan.is_ws_expected || scan.command.size)
|
{ if(scan.is_ws_expected || scan.edict.size)
|
||||||
return x->symbol = ERROR, 0;
|
return x->symbol = SYNTAX, 0;
|
||||||
scan.is_ws_expected = 1, scan.is_source = 1;
|
scan.is_ws_expected = 1, scan.is_source = 1;
|
||||||
scan.command.size = 2;
|
scan.edict.size = 2;
|
||||||
scan.command.expect[1] = EXPECT_KEYWORD;
|
scan.edict.expect[1] = EXPECT_KEYWORD;
|
||||||
scan.command.expect[0] = EXPECT_FREEFORM;
|
scan.edict.expect[0] = EXPECT_FREEFORM;
|
||||||
return x->symbol = SOURCE, 1; }
|
return x->symbol = SOURCE, 1; }
|
||||||
<command> "default"
|
<edict> "default"
|
||||||
{ if(scan.is_ws_expected || !scan.is_source)
|
{ if(scan.is_ws_expected || !scan.is_source)
|
||||||
return x->symbol = ERROR, 0;
|
return x->symbol = SYNTAX, 0;
|
||||||
scan.is_ws_expected = 1, scan.is_source = 0;
|
scan.is_ws_expected = 1, scan.is_source = 0;
|
||||||
return x->symbol = DEFAULT, 1; }
|
return x->symbol = DEFAULT, 1; }
|
||||||
|
|
||||||
// score
|
// score
|
||||||
<command> "significant"
|
<edict> "significant"
|
||||||
{ if(scan.is_ws_expected || scan.command.size)
|
{ if(scan.is_ws_expected || scan.edict.size)
|
||||||
return x->symbol = ERROR, 0;
|
return x->symbol = SYNTAX, 0;
|
||||||
scan.is_ws_expected = 1;
|
scan.is_ws_expected = 1;
|
||||||
scan.command.size = 2;
|
scan.edict.size = 2;
|
||||||
scan.command.expect[1] = EXPECT_FREEFORM;
|
scan.edict.expect[1] = EXPECT_FREEFORM;
|
||||||
scan.command.expect[0] = EXPECT_DATE;
|
scan.edict.expect[0] = EXPECT_DATE;
|
||||||
return x->symbol = SIGNIFICANT, 1; }
|
return x->symbol = SIGNIFICANT, 1; }
|
||||||
<command> @s0 natural @s1
|
<edict> @s0 natural @s1
|
||||||
{ if(scan.is_ws_expected || scan.command.size)
|
{ if(scan.is_ws_expected || scan.edict.size)
|
||||||
return x->symbol = ERROR, 0;
|
return x->symbol = SYNTAX, 0;
|
||||||
scan.is_ws_expected = 1;
|
scan.is_ws_expected = 1;
|
||||||
x->s0 = s0, x->s1 = s1;
|
x->s0 = s0, x->s1 = s1;
|
||||||
return x->symbol = SCORE, 1; }
|
return x->symbol = SCORE, 1; }
|
||||||
|
|
||||||
// general command stuff
|
<edict> ws+ { scan.is_ws_expected = 0; goto scan; }
|
||||||
<command> ws+ { scan.is_ws_expected = 0; goto scan; }
|
<edict> ":"
|
||||||
<command> ":"
|
{ if(!scan.edict.size) return x->symbol = SYNTAX, 0;
|
||||||
{ if(!scan.command.size) return x->symbol = ERROR, 0;
|
|
||||||
scan.is_ws_expected = 0, scan.is_source = 0;
|
scan.is_ws_expected = 0, scan.is_source = 0;
|
||||||
expect_pop(); goto scan; }
|
expect_pop(); goto scan; }
|
||||||
<command_keyword> ws* @s0 id @s1 ws* ";"? / "]"?
|
<edict_keyword> ws* @s0 id @s1 ws* ";"? / "]"?
|
||||||
{ x->s0 = s0, x->s1 = s1; expect_pop();
|
{ x->s0 = s0, x->s1 = s1; expect_pop();
|
||||||
return x->symbol = ARG_KEYWORD, 1; }
|
return x->symbol = ARG_KEYWORD, 1; }
|
||||||
<command_date> ws* @s0 date @s1 ws* ";"? / "]"?
|
<edict_date> ws* @s0 date @s1 ws* ";"? / "]"?
|
||||||
{ x->s0 = s0, x->s1 = s1; expect_pop();
|
{ x->s0 = s0, x->s1 = s1; expect_pop();
|
||||||
return x->symbol = ARG_DATE, 1; }
|
return x->symbol = ARG_DATE, 1; }
|
||||||
<command_freeform> ws* @s0
|
<edict_freeform> ws* @s0
|
||||||
[^ \t\n\r\v\f;[\]\x00][^\t\n\r\v\f;[\]\x00]*[^ \t\n\r\v\f;[\]\x00]*
|
[^ \t\n\r\v\f;[\]\x00][^\t\n\r\v\f;[\]\x00]*[^ \t\n\r\v\f;[\]\x00]*
|
||||||
@s1 ws* ";"? / "]"?
|
@s1 ws* ";"? / "]"?
|
||||||
{ x->s0 = s0, x->s1 = s1; expect_pop();
|
{ x->s0 = s0, x->s1 = s1; expect_pop();
|
||||||
return x->symbol = ARG_FREEFORM, 1; }
|
return x->symbol = ARG_FREEFORM, 1; }
|
||||||
<command, command_end> "]" => text
|
<edict, edict_end> "]" => text
|
||||||
{ if(scan.command.size) return 0;
|
{ if(scan.edict.size) return 0;
|
||||||
goto scan; }
|
goto scan; }
|
||||||
<command, command_keyword, command_date, command_freeform, command_end> *
|
<edict, edict_keyword, edict_date, edict_freeform, edict_end> *
|
||||||
{ return x->symbol = ERROR, 0; }
|
{ return x->symbol = SYNTAX, 0; }
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user