Spilt it.

This commit is contained in:
Neil 2022-02-12 22:35:02 -08:00
parent 5cd3c7ac6b
commit 4ad9a13c73
4 changed files with 150 additions and 139 deletions

View File

@ -1,137 +1,12 @@
/** @license 2022 Neil Edelman, distributed under the terms of the
[MIT License](https://opensource.org/licenses/MIT).
Lexer for journal entries.
@std C89/90 */
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include "interpret.h"
#include "lex.h"
#include <unistd.h> /* chdir (POSIX) */
#include <sys/types.h> /* mode_t (POSIX) */
#include <sys/stat.h> /* umask (POSIX) */
#include <dirent.h> /* opendir readdir closedir */
#include <limits.h>
#include <errno.h>
/*!re2c
re2c:yyfill:enable = 0;
re2c:define:YYCTYPE = char;
*/
static int looks_like_year(const char *const a, int *const year) {
const char *YYCURSOR = a, *YYMARKER = a, *s0;
/*!stags:re2c format = 'const char *@@;\n'; */
assert(a && year);
/*!re2c
@s0 ("-"? [1-9][0-9]* | "0") "\x00" {
int sign = 1, mag;
if(*s0 == '-') { sign = -1; s0++; }
for(mag = 0; *s0 != '\0'; s0++) {
int d = *s0 - '0';
if((INT_MAX - d) / 10 < mag) return 0;
mag = mag * 10 + d;
}
*year = sign * mag;
return 1;
}
* { return 0; }
*/
}
static int looks_like_month(const char *const a) {
const char *YYCURSOR = a, *YYMARKER = a, *s0;
/*!stags:re2c format = 'const char *@@;\n'; */
assert(a);
/*!re2c
@s0 [0-1][0-9] "\x00" {
int val = 10 * (s0[0] - '0') + (s0[1] - '0');
return val < 1 || val > 12 ? 0 : val;
}
* { return 0; }
*/
}
static int looks_like_day(const char *const a) {
const char *YYCURSOR = a, *YYMARKER = a, *s0;
/*!stags:re2c format = 'const char *@@;\n'; */
assert(a);
/*!re2c
@s0 [0-3][0-9] ".txt\x00" {
int val = 10 * (s0[0] - '0') + (s0[1] - '0');
return val < 1 || val > 31 ? 0 : val;
}
* { return 0; }
*/
}
/* This defines `enum condition`. */
/*!types:re2c*/
enum symbol { END, TEXT, BANG, BRACKET, WHITE, MAP };
/** scanner reads a file and extracts semantic information. Valid to access
only while underlying pointers do not change. */
struct scanner {
/* `re2c` variables; these point directly into `buffer`. */
const char *marker, *ctx_marker, *from, *cursor;
/* Weird `c2re` stuff: these fields have to come after when >5? */
const char *label, *buffer, *s0, *s1;
enum condition condition;
enum symbol symbol;
size_t line;
int ws_before;
};
/*!re2c
re2c:flags:tags = 1;
re2c:define:YYCURSOR = s->cursor;
re2c:define:YYMARKER = s->marker;
re2c:define:YYCTXMARKER = s->ctx_marker;
re2c:define:YYCONDTYPE = 'condition';
re2c:define:YYGETCONDITION = 's->condition';
re2c:define:YYGETCONDITION:naked = 1;
re2c:define:YYSETCONDITION = 's->condition = @@;';
re2c:define:YYSETCONDITION:naked = 1;
// Eof is marked by null when preparing files for lexing.
// Mutually exclusive; only !, [, are not covered.
end = "\x00";
newline = "\n" | "\r" "\n"?;
ws = [ \t\v\f];
glyph = [^ \t\n\r\v\f![\x00];
glyphs = glyph+;
// inside the block
decimal = [1-9][0-9]*;
number = ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0];
id = [a-zA-Z_][a-zA-Z_\-0-9]{0,63};
*/
static int lex(struct scanner *const s) {
const char *s0, *s1;
/*!stags:re2c format = 'const char *@@;\n'; */
s->ws_before = 0;
scan:
/*!re2c
<text> end { return s->symbol = END, 1; }
// fixme: paragraphs.
<text> newline { s->line++; s->ws_before = 1; goto scan; }
<text> ws+ { s->ws_before = 1; goto scan; }
<text> @s0 glyph+ @s1 { s->s0 = s0, s->s1 = s1;
return s->symbol = TEXT, 1; }
<text> @s0 "!" @s1 { s->s0 = s0, s->s1 = s1;
return s->symbol = BANG, 1; }
<text> "\\" @s0 "[" @s1 { s->s0 = s0, s->s1 = s1;
return s->symbol = BRACKET, 1; }
<text> "![" :=> image
<text> "[" :=> command
<image> * { return 0; }
<image> ws* "osm" ws* "](geo:" @s0 number "," @s1 number ")" {
s->condition = yyctext;
s->s0 = s0, s->s1 = s1;
printf("Got a map.\n");
return 1;
}
<command> * { return 0; }
*/
}
#include <stdio.h>
#include <stdlib.h>
#if INT_MAX >= 100000000000
#error int_to_string requires truncation on this compiler.
@ -149,12 +24,6 @@ static int int_cmp(const int *const a, const int *const b)
static int void_int_cmp(const void *const a, const void *const b)
{ return int_cmp(a, b); }
#include <unistd.h> /* chdir (POSIX) */
#include <sys/types.h> /* mode_t (POSIX) */
#include <sys/stat.h> /* umask (POSIX) */
#include <dirent.h> /* opendir readdir closedir */
#include <limits.h>
int main(int argc, char **argv) {
int success = EXIT_FAILURE;
DIR *dir = 0;

1
src/interpret.h Normal file
View File

@ -0,0 +1 @@

6
src/lex.h Normal file
View File

@ -0,0 +1,6 @@
int looks_like_year(const char *, int *);
int looks_like_month(const char *);
int looks_like_day(const char *);
struct scanner;
int lex(struct scanner *const s);

135
src/lex.re_c.c Normal file
View File

@ -0,0 +1,135 @@
/** @license 2022 Neil Edelman, distributed under the terms of the
[MIT License](https://opensource.org/licenses/MIT).
Lexer for journal entries.
@std C89/90 */
#include "../src/lex.h"
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <limits.h>
#include <errno.h>
/*!re2c
re2c:yyfill:enable = 0;
re2c:define:YYCTYPE = char;
*/
int looks_like_year(const char *const a, int *const year) {
const char *YYCURSOR = a, *YYMARKER = a, *s0;
/*!stags:re2c format = 'const char *@@;\n'; */
assert(a && year);
/*!re2c
@s0 ("-"? [1-9][0-9]* | "0") "\x00" {
int sign = 1, mag;
if(*s0 == '-') { sign = -1; s0++; }
for(mag = 0; *s0 != '\0'; s0++) {
int d = *s0 - '0';
if((INT_MAX - d) / 10 < mag) return 0;
mag = mag * 10 + d;
}
*year = sign * mag;
return 1;
}
* { return 0; }
*/
}
int looks_like_month(const char *const a) {
const char *YYCURSOR = a, *YYMARKER = a, *s0;
/*!stags:re2c format = 'const char *@@;\n'; */
assert(a);
/*!re2c
@s0 [0-1][0-9] "\x00" {
int val = 10 * (s0[0] - '0') + (s0[1] - '0');
return val < 1 || val > 12 ? 0 : val;
}
* { return 0; }
*/
}
int looks_like_day(const char *const a) {
const char *YYCURSOR = a, *YYMARKER = a, *s0;
/*!stags:re2c format = 'const char *@@;\n'; */
assert(a);
/*!re2c
@s0 [0-3][0-9] ".txt\x00" {
int val = 10 * (s0[0] - '0') + (s0[1] - '0');
return val < 1 || val > 31 ? 0 : val;
}
* { return 0; }
*/
}
/* This defines `enum condition`. */
/*!types:re2c*/
enum symbol { END, TEXT, BANG, BRACKET, WHITE, MAP };
/** scanner reads a file and extracts semantic information. Valid to access
only while underlying pointers do not change. */
struct scanner {
/* `re2c` variables; these point directly into `buffer`. */
const char *marker, *ctx_marker, *from, *cursor;
/* Weird `c2re` stuff: these fields have to come after when >5? */
const char *label, *buffer, *s0, *s1;
enum condition condition;
enum symbol symbol;
size_t line;
int ws_before;
};
/*!re2c
re2c:flags:tags = 1;
re2c:define:YYCURSOR = s->cursor;
re2c:define:YYMARKER = s->marker;
re2c:define:YYCTXMARKER = s->ctx_marker;
re2c:define:YYCONDTYPE = 'condition';
re2c:define:YYGETCONDITION = 's->condition';
re2c:define:YYGETCONDITION:naked = 1;
re2c:define:YYSETCONDITION = 's->condition = @@;';
re2c:define:YYSETCONDITION:naked = 1;
// Eof is marked by null when preparing files for lexing.
// Mutually exclusive; only !, [, are not covered.
end = "\x00";
newline = "\n" | "\r" "\n"?;
ws = [ \t\v\f];
glyph = [^ \t\n\r\v\f![\x00];
glyphs = glyph+;
// inside the block
decimal = [1-9][0-9]*;
number = ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0];
id = [a-zA-Z_][a-zA-Z_\-0-9]{0,63};
*/
int lex(struct scanner *const s) {
const char *s0, *s1;
/*!stags:re2c format = 'const char *@@;\n'; */
s->ws_before = 0;
scan:
/*!re2c
<text> end { return s->symbol = END, 1; }
// fixme: paragraphs.
<text> newline { s->line++; s->ws_before = 1; goto scan; }
<text> ws+ { s->ws_before = 1; goto scan; }
<text> @s0 glyph+ @s1 { s->s0 = s0, s->s1 = s1;
return s->symbol = TEXT, 1; }
<text> @s0 "!" @s1 { s->s0 = s0, s->s1 = s1;
return s->symbol = BANG, 1; }
<text> "\\" @s0 "[" @s1 { s->s0 = s0, s->s1 = s1;
return s->symbol = BRACKET, 1; }
<text> "![" :=> image
<text> "[" :=> command
<image> * { return 0; }
<image> ws* "osm" ws* "](geo:" @s0 number "," @s1 number ")" {
s->condition = yyctext;
s->s0 = s0, s->s1 = s1;
printf("Got a map.\n");
return 1;
}
<command> * { return 0; }
*/
}