Fleshed out [source:]

This commit is contained in:
Neil 2022-02-13 20:45:38 -08:00
parent 4ad9a13c73
commit b8234e5eb7
4 changed files with 136 additions and 40 deletions

View File

@ -1,4 +1,3 @@
#include "interpret.h"
#include "lex.h"
#include <unistd.h> /* chdir (POSIX) */
#include <sys/types.h> /* mode_t (POSIX) */
@ -24,11 +23,45 @@ static int int_cmp(const int *const a, const int *const b)
static int void_int_cmp(const void *const a, const void *const b)
{ return int_cmp(a, b); }
#define ARRAY_NAME char
#define ARRAY_TYPE char
#include "array.h"
/** Append a text file, `fn`, to `c`, and add a '\0'.
@return Success. A partial read is failure. @throws[fopen, fread, malloc]
@throws[EISEQ] The text file has embedded nulls.
@throws[ERANGE] If the standard library does not follow POSIX. */
static int append_file(struct char_array *c, const char *const fn) {
FILE *fp = 0;
const size_t granularity = 1024;
size_t nread;
char *cursor;
int success = 0;
assert(c && fn);
if(!(fp = fopen(fn, "r"))) goto catch;
/* Read entire file in chunks. */
do if(!(cursor = char_array_buffer(c, granularity))
|| (nread = fread(cursor, 1, granularity, fp), ferror(fp))
|| !char_array_append(c, nread)) goto catch;
while(nread == granularity);
/* File to `C` string. */
if(!(cursor = char_array_new(c))) goto catch;
*cursor = '\0';
/* Binary files with embedded '\0' are not allowed. */
if(strchr(c->data, '\0') != cursor) { errno = EILSEQ; goto catch; }
{ success = 1; goto finally; }
catch:
if(!errno) errno = EILSEQ; /* Will never be true on POSIX. */
finally:
if(fp && fclose(fp)) success = 0;
return success;
}
int main(int argc, char **argv) {
int success = EXIT_FAILURE;
DIR *dir = 0;
struct dirent *de;
struct int_array years = ARRAY_IDLE, months = ARRAY_IDLE, days = ARRAY_IDLE;
struct char_array entry = ARRAY_IDLE;
int *y, *y_end;
/* Get the years list as directories matching a year in order. */
@ -39,7 +72,7 @@ int main(int argc, char **argv) {
while((de = readdir(dir))) {
struct stat st;
int year, *p;
if(!looks_like_year(de->d_name, &year)) continue;
if(!lex_looks_like_year(de->d_name, &year)) continue;
if(stat(de->d_name, &st)) goto catch;
if(!S_ISDIR(st.st_mode)) continue;
if(!(p = int_array_new(&years))) goto catch;
@ -60,7 +93,7 @@ int main(int argc, char **argv) {
while((de = readdir(dir))) {
struct stat st;
int month, *p;
if(!(month = looks_like_month(de->d_name))) continue;
if(!(month = lex_looks_like_month(de->d_name))) continue;
if(stat(de->d_name, &st)) goto catch;
if(!S_ISDIR(st.st_mode)) continue;
if(!(p = int_array_new(&months))) goto catch;
@ -81,7 +114,7 @@ int main(int argc, char **argv) {
struct stat st;
int day, *p;
/* fixme: Have yyyy-mm-dd to figure out how many days. */
if(!(day = looks_like_day(de->d_name))) continue;
if(!(day = lex_looks_like_day(de->d_name))) continue;
if(stat(de->d_name, &st)) goto catch;
if(S_ISDIR(st.st_mode)) continue;
if(!(p = int_array_new(&days))) goto catch;
@ -92,21 +125,41 @@ int main(int argc, char **argv) {
fprintf(stderr, "%s: %s.\n", temp, int_array_to_string(&days));
for(d = days.data, d_end = d + days.size; d < d_end; d++) {
struct lex lex;
printf("%d-%.2d-%.2d\n", *y, *m, *d);
sprintf(temp, "%.2d.txt", *d);
if(!append_file(&entry, temp)) goto catch;
printf("%s", entry.data);
printf("Lexing:\n");
lex_reset(entry.data);
while(lex_next(&lex)) {
printf("%lu: %s",
(unsigned long)lex.line, lex_symbols[lex.symbol]);
if(lex.symbol == TEXT) {
if(lex.s0 + INT_MAX < lex.s1)
{ errno = EILSEQ; goto catch; }
printf(" %.*s", (int)(lex.s1 - lex.s0), lex.s0);
}
printf(".\n");
}
char_array_clear(&entry);
break; /* fixme */
}
int_array_clear(&days);
if(chdir("..") == -1) goto catch;
break; /* fixme */
}
int_array_clear(&months);
if(chdir("..") == -1) goto catch;
break; /* fixme */
}
{ success = EXIT_SUCCESS; goto finally; }
catch:
perror("interpret");
finally:
if(dir) closedir(dir);
if(dir && closedir(dir)) success = EXIT_FAILURE, perror("dir");
int_array_(&years);
int_array_(&months);
return EXIT_FAILURE;

View File

@ -1 +0,0 @@

View File

@ -1,6 +1,24 @@
int looks_like_year(const char *, int *);
int looks_like_month(const char *);
int looks_like_day(const char *);
#include <stddef.h>
struct scanner;
int lex(struct scanner *const s);
int lex_looks_like_year(const char *, int *);
int lex_looks_like_month(const char *);
int lex_looks_like_day(const char *);
#define LEX_SYMBOL X(END), X(ERROR), X(TEXT), X(BANG), X(BRACKET), X(WHITE), \
X(MAP), \
/* Commands */ X(DEFAULT), X(SOURCE)
#define X(n) n
struct lex {
enum lex_symbol { LEX_SYMBOL } symbol;
int ws_before, new_paragraph;
const char *s0, *s1;
size_t line;
};
#undef X
#define X(n) #n
static const char *const lex_symbols[] = { LEX_SYMBOL };
#undef X
void lex_reset(const char *const buffer);
int lex_next(struct lex *);

View File

@ -17,7 +17,7 @@ re2c:yyfill:enable = 0;
re2c:define:YYCTYPE = char;
*/
int looks_like_year(const char *const a, int *const year) {
int lex_looks_like_year(const char *const a, int *const year) {
const char *YYCURSOR = a, *YYMARKER = a, *s0;
/*!stags:re2c format = 'const char *@@;\n'; */
assert(a && year);
@ -37,7 +37,7 @@ int looks_like_year(const char *const a, int *const year) {
*/
}
int looks_like_month(const char *const a) {
int lex_looks_like_month(const char *const a) {
const char *YYCURSOR = a, *YYMARKER = a, *s0;
/*!stags:re2c format = 'const char *@@;\n'; */
assert(a);
@ -50,7 +50,7 @@ int looks_like_month(const char *const a) {
*/
}
int looks_like_day(const char *const a) {
int lex_looks_like_day(const char *const a) {
const char *YYCURSOR = a, *YYMARKER = a, *s0;
/*!stags:re2c format = 'const char *@@;\n'; */
assert(a);
@ -65,30 +65,35 @@ int looks_like_day(const char *const a) {
/* This defines `enum condition`. */
/*!types:re2c*/
enum symbol { END, TEXT, BANG, BRACKET, WHITE, MAP };
/** scanner reads a file and extracts semantic information. Valid to access
only while underlying pointers do not change. */
struct scanner {
static struct scan {
/* `re2c` variables; these point directly into `buffer`. */
const char *marker, *ctx_marker, *from, *cursor;
/* Weird `c2re` stuff: these fields have to come after when >5? */
const char *label, *buffer, *s0, *s1;
const char *label, *buffer;
enum condition condition;
enum symbol symbol;
size_t line;
int ws_before;
};
int is_ws_expected;
} scan; /* Terrible, gah. */
void lex_reset(const char *const buffer) {
scan.marker = scan.ctx_marker = scan.from = scan.cursor = scan.label
= scan.buffer = buffer;
scan.condition = 0;
scan.line = 1;
}
/*!re2c
re2c:flags:tags = 1;
re2c:define:YYCURSOR = s->cursor;
re2c:define:YYMARKER = s->marker;
re2c:define:YYCTXMARKER = s->ctx_marker;
re2c:define:YYCURSOR = scan.cursor;
re2c:define:YYMARKER = scan.marker;
re2c:define:YYCTXMARKER = scan.ctx_marker;
re2c:define:YYCONDTYPE = 'condition';
re2c:define:YYGETCONDITION = 's->condition';
re2c:define:YYGETCONDITION = 'scan.condition';
re2c:define:YYGETCONDITION:naked = 1;
re2c:define:YYSETCONDITION = 's->condition = @@;';
re2c:define:YYSETCONDITION = 'scan.condition = @@;';
re2c:define:YYSETCONDITION:naked = 1;
// Eof is marked by null when preparing files for lexing.
@ -105,31 +110,52 @@ number = ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0];
id = [a-zA-Z_][a-zA-Z_\-0-9]{0,63};
*/
int lex(struct scanner *const s) {
int lex_next(struct lex *const x) {
const char *s0, *s1;
const size_t prev_line = scan.line;
/*!stags:re2c format = 'const char *@@;\n'; */
s->ws_before = 0;
assert(x);
if(!scan.buffer) return 0;
x->s0 = x->s1 = 0;
x->line = prev_line;
x->ws_before = 0;
x->new_paragraph = 0;
scan:
/*!re2c
<text> end { return s->symbol = END, 1; }
<text> end { printf("end\n");return x->symbol = END, 0; }
<image, command, command_args> end { return x->symbol = ERROR, 0; }
// fixme: paragraphs.
<text> newline { s->line++; s->ws_before = 1; goto scan; }
<text> ws+ { s->ws_before = 1; goto scan; }
<text> @s0 glyph+ @s1 { s->s0 = s0, s->s1 = s1;
return s->symbol = TEXT, 1; }
<text> @s0 "!" @s1 { s->s0 = s0, s->s1 = s1;
return s->symbol = BANG, 1; }
<text> "\\" @s0 "[" @s1 { s->s0 = s0, s->s1 = s1;
return s->symbol = BRACKET, 1; }
<text> newline
{ x->line = ++scan.line;
x->new_paragraph = prev_line + 2 >= scan.line;
x->ws_before = 1;
goto scan; }
<text> ws+ { x->ws_before = 1; goto scan; }
<text> @s0 glyph+ @s1 { x->s0 = s0, x->s1 = s1;
return x->symbol = TEXT, 1; }
<text> @s0 "!" @s1 { x->s0 = s0, x->s1 = s1;
return x->symbol = BANG, 1; }
<text> "\\" @s0 "[" @s1 { x->s0 = s0, x->s1 = s1;
return x->symbol = BRACKET, 1; }
<text> "![" :=> image
<text> "[" :=> command
<image> * { return 0; }
<image> * { printf("image(broken)\n");return 0; }
<image> ws* "osm" ws* "](geo:" @s0 number "," @s1 number ")" {
s->condition = yyctext;
s->s0 = s0, s->s1 = s1;
scan.condition = yyctext;
x->s0 = s0, x->s1 = s1;
printf("Got a map.\n");
return 1;
}
<command> * { return 0; }
<command> ws+ { scan.is_ws_expected = 0; goto scan; }
<command> "source"
{ if(scan.is_ws_expected) return x->symbol = ERROR, 0;
return scan.is_ws_expected = 1, x->symbol = SOURCE, 1; }
<command> "default"
{ if(scan.is_ws_expected) return 0;
return scan.is_ws_expected = 1, x->symbol = DEFAULT, 1; }
<command> ":" :=> command_args
<command, command_args> * { return x->symbol = ERROR, 0; }
<command_args> [^\n\r\]\x00]+ { printf("Command args fixme\n"); goto scan; }
<command_args> "]" :=> text
*/
}