From 4ad9a13c733584ff3993fe42eb17fed564900a0d Mon Sep 17 00:00:00 2001 From: Neil Date: Sat, 12 Feb 2022 22:35:02 -0800 Subject: [PATCH] Spilt it. --- src/{interpret.re_c.c => interpret.c} | 147 ++------------------------ src/interpret.h | 1 + src/lex.h | 6 ++ src/lex.re_c.c | 135 +++++++++++++++++++++++ 4 files changed, 150 insertions(+), 139 deletions(-) rename src/{interpret.re_c.c => interpret.c} (50%) create mode 100644 src/interpret.h create mode 100644 src/lex.h create mode 100644 src/lex.re_c.c diff --git a/src/interpret.re_c.c b/src/interpret.c similarity index 50% rename from src/interpret.re_c.c rename to src/interpret.c index 182b889..882d042 100644 --- a/src/interpret.re_c.c +++ b/src/interpret.c @@ -1,137 +1,12 @@ -/** @license 2022 Neil Edelman, distributed under the terms of the - [MIT License](https://opensource.org/licenses/MIT). - - Lexer for journal entries. - - @std C89/90 */ - -#include -#include -#include +#include "interpret.h" +#include "lex.h" +#include /* chdir (POSIX) */ +#include /* mode_t (POSIX) */ +#include /* umask (POSIX) */ +#include /* opendir readdir closedir */ #include -#include - -/*!re2c -re2c:yyfill:enable = 0; -re2c:define:YYCTYPE = char; -*/ - -static int looks_like_year(const char *const a, int *const year) { - const char *YYCURSOR = a, *YYMARKER = a, *s0; - /*!stags:re2c format = 'const char *@@;\n'; */ - assert(a && year); - /*!re2c - @s0 ("-"? [1-9][0-9]* | "0") "\x00" { - int sign = 1, mag; - if(*s0 == '-') { sign = -1; s0++; } - for(mag = 0; *s0 != '\0'; s0++) { - int d = *s0 - '0'; - if((INT_MAX - d) / 10 < mag) return 0; - mag = mag * 10 + d; - } - *year = sign * mag; - return 1; - } - * { return 0; } - */ -} - -static int looks_like_month(const char *const a) { - const char *YYCURSOR = a, *YYMARKER = a, *s0; - /*!stags:re2c format = 'const char *@@;\n'; */ - assert(a); - /*!re2c - @s0 [0-1][0-9] "\x00" { - int val = 10 * (s0[0] - '0') + (s0[1] - '0'); - return val < 1 || val > 12 ? 0 : val; - } - * { return 0; } - */ -} - -static int looks_like_day(const char *const a) { - const char *YYCURSOR = a, *YYMARKER = a, *s0; - /*!stags:re2c format = 'const char *@@;\n'; */ - assert(a); - /*!re2c - @s0 [0-3][0-9] ".txt\x00" { - int val = 10 * (s0[0] - '0') + (s0[1] - '0'); - return val < 1 || val > 31 ? 0 : val; - } - * { return 0; } - */ -} - -/* This defines `enum condition`. */ -/*!types:re2c*/ -enum symbol { END, TEXT, BANG, BRACKET, WHITE, MAP }; - -/** scanner reads a file and extracts semantic information. Valid to access - only while underlying pointers do not change. */ -struct scanner { - /* `re2c` variables; these point directly into `buffer`. */ - const char *marker, *ctx_marker, *from, *cursor; - /* Weird `c2re` stuff: these fields have to come after when >5? */ - const char *label, *buffer, *s0, *s1; - enum condition condition; - enum symbol symbol; - size_t line; - int ws_before; -}; - -/*!re2c -re2c:flags:tags = 1; -re2c:define:YYCURSOR = s->cursor; -re2c:define:YYMARKER = s->marker; -re2c:define:YYCTXMARKER = s->ctx_marker; -re2c:define:YYCONDTYPE = 'condition'; -re2c:define:YYGETCONDITION = 's->condition'; -re2c:define:YYGETCONDITION:naked = 1; -re2c:define:YYSETCONDITION = 's->condition = @@;'; -re2c:define:YYSETCONDITION:naked = 1; - -// Eof is marked by null when preparing files for lexing. -// Mutually exclusive; only !, [, are not covered. -end = "\x00"; -newline = "\n" | "\r" "\n"?; -ws = [ \t\v\f]; -glyph = [^ \t\n\r\v\f![\x00]; -glyphs = glyph+; - -// inside the block -decimal = [1-9][0-9]*; -number = ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0]; -id = [a-zA-Z_][a-zA-Z_\-0-9]{0,63}; -*/ - -static int lex(struct scanner *const s) { - const char *s0, *s1; - /*!stags:re2c format = 'const char *@@;\n'; */ - s->ws_before = 0; -scan: - /*!re2c - end { return s->symbol = END, 1; } - // fixme: paragraphs. - newline { s->line++; s->ws_before = 1; goto scan; } - ws+ { s->ws_before = 1; goto scan; } - @s0 glyph+ @s1 { s->s0 = s0, s->s1 = s1; - return s->symbol = TEXT, 1; } - @s0 "!" @s1 { s->s0 = s0, s->s1 = s1; - return s->symbol = BANG, 1; } - "\\" @s0 "[" @s1 { s->s0 = s0, s->s1 = s1; - return s->symbol = BRACKET, 1; } - "![" :=> image - "[" :=> command - * { return 0; } - ws* "osm" ws* "](geo:" @s0 number "," @s1 number ")" { - s->condition = yyctext; - s->s0 = s0, s->s1 = s1; - printf("Got a map.\n"); - return 1; - } - * { return 0; } - */ -} +#include +#include #if INT_MAX >= 100000000000 #error int_to_string requires truncation on this compiler. @@ -149,12 +24,6 @@ static int int_cmp(const int *const a, const int *const b) static int void_int_cmp(const void *const a, const void *const b) { return int_cmp(a, b); } -#include /* chdir (POSIX) */ -#include /* mode_t (POSIX) */ -#include /* umask (POSIX) */ -#include /* opendir readdir closedir */ -#include - int main(int argc, char **argv) { int success = EXIT_FAILURE; DIR *dir = 0; diff --git a/src/interpret.h b/src/interpret.h new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/interpret.h @@ -0,0 +1 @@ + diff --git a/src/lex.h b/src/lex.h new file mode 100644 index 0000000..79273d2 --- /dev/null +++ b/src/lex.h @@ -0,0 +1,6 @@ +int looks_like_year(const char *, int *); +int looks_like_month(const char *); +int looks_like_day(const char *); + +struct scanner; +int lex(struct scanner *const s); diff --git a/src/lex.re_c.c b/src/lex.re_c.c new file mode 100644 index 0000000..57f76d1 --- /dev/null +++ b/src/lex.re_c.c @@ -0,0 +1,135 @@ +/** @license 2022 Neil Edelman, distributed under the terms of the + [MIT License](https://opensource.org/licenses/MIT). + + Lexer for journal entries. + + @std C89/90 */ + +#include "../src/lex.h" +#include +#include +#include +#include +#include + +/*!re2c +re2c:yyfill:enable = 0; +re2c:define:YYCTYPE = char; +*/ + +int looks_like_year(const char *const a, int *const year) { + const char *YYCURSOR = a, *YYMARKER = a, *s0; + /*!stags:re2c format = 'const char *@@;\n'; */ + assert(a && year); + /*!re2c + @s0 ("-"? [1-9][0-9]* | "0") "\x00" { + int sign = 1, mag; + if(*s0 == '-') { sign = -1; s0++; } + for(mag = 0; *s0 != '\0'; s0++) { + int d = *s0 - '0'; + if((INT_MAX - d) / 10 < mag) return 0; + mag = mag * 10 + d; + } + *year = sign * mag; + return 1; + } + * { return 0; } + */ +} + +int looks_like_month(const char *const a) { + const char *YYCURSOR = a, *YYMARKER = a, *s0; + /*!stags:re2c format = 'const char *@@;\n'; */ + assert(a); + /*!re2c + @s0 [0-1][0-9] "\x00" { + int val = 10 * (s0[0] - '0') + (s0[1] - '0'); + return val < 1 || val > 12 ? 0 : val; + } + * { return 0; } + */ +} + +int looks_like_day(const char *const a) { + const char *YYCURSOR = a, *YYMARKER = a, *s0; + /*!stags:re2c format = 'const char *@@;\n'; */ + assert(a); + /*!re2c + @s0 [0-3][0-9] ".txt\x00" { + int val = 10 * (s0[0] - '0') + (s0[1] - '0'); + return val < 1 || val > 31 ? 0 : val; + } + * { return 0; } + */ +} + +/* This defines `enum condition`. */ +/*!types:re2c*/ +enum symbol { END, TEXT, BANG, BRACKET, WHITE, MAP }; + +/** scanner reads a file and extracts semantic information. Valid to access + only while underlying pointers do not change. */ +struct scanner { + /* `re2c` variables; these point directly into `buffer`. */ + const char *marker, *ctx_marker, *from, *cursor; + /* Weird `c2re` stuff: these fields have to come after when >5? */ + const char *label, *buffer, *s0, *s1; + enum condition condition; + enum symbol symbol; + size_t line; + int ws_before; +}; + +/*!re2c +re2c:flags:tags = 1; +re2c:define:YYCURSOR = s->cursor; +re2c:define:YYMARKER = s->marker; +re2c:define:YYCTXMARKER = s->ctx_marker; +re2c:define:YYCONDTYPE = 'condition'; +re2c:define:YYGETCONDITION = 's->condition'; +re2c:define:YYGETCONDITION:naked = 1; +re2c:define:YYSETCONDITION = 's->condition = @@;'; +re2c:define:YYSETCONDITION:naked = 1; + +// Eof is marked by null when preparing files for lexing. +// Mutually exclusive; only !, [, are not covered. +end = "\x00"; +newline = "\n" | "\r" "\n"?; +ws = [ \t\v\f]; +glyph = [^ \t\n\r\v\f![\x00]; +glyphs = glyph+; + +// inside the block +decimal = [1-9][0-9]*; +number = ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0]; +id = [a-zA-Z_][a-zA-Z_\-0-9]{0,63}; +*/ + +int lex(struct scanner *const s) { + const char *s0, *s1; + /*!stags:re2c format = 'const char *@@;\n'; */ + s->ws_before = 0; +scan: + /*!re2c + end { return s->symbol = END, 1; } + // fixme: paragraphs. + newline { s->line++; s->ws_before = 1; goto scan; } + ws+ { s->ws_before = 1; goto scan; } + @s0 glyph+ @s1 { s->s0 = s0, s->s1 = s1; + return s->symbol = TEXT, 1; } + @s0 "!" @s1 { s->s0 = s0, s->s1 = s1; + return s->symbol = BANG, 1; } + "\\" @s0 "[" @s1 { s->s0 = s0, s->s1 = s1; + return s->symbol = BRACKET, 1; } + "![" :=> image + "[" :=> command + * { return 0; } + ws* "osm" ws* "](geo:" @s0 number "," @s1 number ")" { + s->condition = yyctext; + s->s0 = s0, s->s1 = s1; + printf("Got a map.\n"); + return 1; + } + * { return 0; } + */ +}