diff --git a/src/interpret.c b/src/interpret.c index 882d042..9dbf690 100644 --- a/src/interpret.c +++ b/src/interpret.c @@ -1,4 +1,3 @@ -#include "interpret.h" #include "lex.h" #include /* chdir (POSIX) */ #include /* mode_t (POSIX) */ @@ -24,11 +23,45 @@ static int int_cmp(const int *const a, const int *const b) static int void_int_cmp(const void *const a, const void *const b) { return int_cmp(a, b); } +#define ARRAY_NAME char +#define ARRAY_TYPE char +#include "array.h" +/** Append a text file, `fn`, to `c`, and add a '\0'. + @return Success. A partial read is failure. @throws[fopen, fread, malloc] + @throws[EISEQ] The text file has embedded nulls. + @throws[ERANGE] If the standard library does not follow POSIX. */ +static int append_file(struct char_array *c, const char *const fn) { + FILE *fp = 0; + const size_t granularity = 1024; + size_t nread; + char *cursor; + int success = 0; + assert(c && fn); + if(!(fp = fopen(fn, "r"))) goto catch; + /* Read entire file in chunks. */ + do if(!(cursor = char_array_buffer(c, granularity)) + || (nread = fread(cursor, 1, granularity, fp), ferror(fp)) + || !char_array_append(c, nread)) goto catch; + while(nread == granularity); + /* File to `C` string. */ + if(!(cursor = char_array_new(c))) goto catch; + *cursor = '\0'; + /* Binary files with embedded '\0' are not allowed. */ + if(strchr(c->data, '\0') != cursor) { errno = EILSEQ; goto catch; } + { success = 1; goto finally; } +catch: + if(!errno) errno = EILSEQ; /* Will never be true on POSIX. */ +finally: + if(fp && fclose(fp)) success = 0; + return success; +} + int main(int argc, char **argv) { int success = EXIT_FAILURE; DIR *dir = 0; struct dirent *de; struct int_array years = ARRAY_IDLE, months = ARRAY_IDLE, days = ARRAY_IDLE; + struct char_array entry = ARRAY_IDLE; int *y, *y_end; /* Get the years list as directories matching a year in order. */ @@ -39,7 +72,7 @@ int main(int argc, char **argv) { while((de = readdir(dir))) { struct stat st; int year, *p; - if(!looks_like_year(de->d_name, &year)) continue; + if(!lex_looks_like_year(de->d_name, &year)) continue; if(stat(de->d_name, &st)) goto catch; if(!S_ISDIR(st.st_mode)) continue; if(!(p = int_array_new(&years))) goto catch; @@ -60,7 +93,7 @@ int main(int argc, char **argv) { while((de = readdir(dir))) { struct stat st; int month, *p; - if(!(month = looks_like_month(de->d_name))) continue; + if(!(month = lex_looks_like_month(de->d_name))) continue; if(stat(de->d_name, &st)) goto catch; if(!S_ISDIR(st.st_mode)) continue; if(!(p = int_array_new(&months))) goto catch; @@ -81,7 +114,7 @@ int main(int argc, char **argv) { struct stat st; int day, *p; /* fixme: Have yyyy-mm-dd to figure out how many days. */ - if(!(day = looks_like_day(de->d_name))) continue; + if(!(day = lex_looks_like_day(de->d_name))) continue; if(stat(de->d_name, &st)) goto catch; if(S_ISDIR(st.st_mode)) continue; if(!(p = int_array_new(&days))) goto catch; @@ -92,21 +125,41 @@ int main(int argc, char **argv) { fprintf(stderr, "%s: %s.\n", temp, int_array_to_string(&days)); for(d = days.data, d_end = d + days.size; d < d_end; d++) { + struct lex lex; printf("%d-%.2d-%.2d\n", *y, *m, *d); + sprintf(temp, "%.2d.txt", *d); + if(!append_file(&entry, temp)) goto catch; + printf("%s", entry.data); + printf("Lexing:\n"); + lex_reset(entry.data); + while(lex_next(&lex)) { + printf("%lu: %s", + (unsigned long)lex.line, lex_symbols[lex.symbol]); + if(lex.symbol == TEXT) { + if(lex.s0 + INT_MAX < lex.s1) + { errno = EILSEQ; goto catch; } + printf(" %.*s", (int)(lex.s1 - lex.s0), lex.s0); + } + printf(".\n"); + } + char_array_clear(&entry); + break; /* fixme */ } int_array_clear(&days); if(chdir("..") == -1) goto catch; + break; /* fixme */ } int_array_clear(&months); if(chdir("..") == -1) goto catch; + break; /* fixme */ } { success = EXIT_SUCCESS; goto finally; } catch: perror("interpret"); finally: - if(dir) closedir(dir); + if(dir && closedir(dir)) success = EXIT_FAILURE, perror("dir"); int_array_(&years); int_array_(&months); return EXIT_FAILURE; diff --git a/src/interpret.h b/src/interpret.h deleted file mode 100644 index 8b13789..0000000 --- a/src/interpret.h +++ /dev/null @@ -1 +0,0 @@ - diff --git a/src/lex.h b/src/lex.h index 79273d2..bac7278 100644 --- a/src/lex.h +++ b/src/lex.h @@ -1,6 +1,24 @@ -int looks_like_year(const char *, int *); -int looks_like_month(const char *); -int looks_like_day(const char *); +#include -struct scanner; -int lex(struct scanner *const s); +int lex_looks_like_year(const char *, int *); +int lex_looks_like_month(const char *); +int lex_looks_like_day(const char *); + +#define LEX_SYMBOL X(END), X(ERROR), X(TEXT), X(BANG), X(BRACKET), X(WHITE), \ + X(MAP), \ + /* Commands */ X(DEFAULT), X(SOURCE) + +#define X(n) n +struct lex { + enum lex_symbol { LEX_SYMBOL } symbol; + int ws_before, new_paragraph; + const char *s0, *s1; + size_t line; +}; +#undef X +#define X(n) #n +static const char *const lex_symbols[] = { LEX_SYMBOL }; +#undef X + +void lex_reset(const char *const buffer); +int lex_next(struct lex *); diff --git a/src/lex.re_c.c b/src/lex.re_c.c index 57f76d1..f84421c 100644 --- a/src/lex.re_c.c +++ b/src/lex.re_c.c @@ -17,7 +17,7 @@ re2c:yyfill:enable = 0; re2c:define:YYCTYPE = char; */ -int looks_like_year(const char *const a, int *const year) { +int lex_looks_like_year(const char *const a, int *const year) { const char *YYCURSOR = a, *YYMARKER = a, *s0; /*!stags:re2c format = 'const char *@@;\n'; */ assert(a && year); @@ -37,7 +37,7 @@ int looks_like_year(const char *const a, int *const year) { */ } -int looks_like_month(const char *const a) { +int lex_looks_like_month(const char *const a) { const char *YYCURSOR = a, *YYMARKER = a, *s0; /*!stags:re2c format = 'const char *@@;\n'; */ assert(a); @@ -50,7 +50,7 @@ int looks_like_month(const char *const a) { */ } -int looks_like_day(const char *const a) { +int lex_looks_like_day(const char *const a) { const char *YYCURSOR = a, *YYMARKER = a, *s0; /*!stags:re2c format = 'const char *@@;\n'; */ assert(a); @@ -65,30 +65,35 @@ int looks_like_day(const char *const a) { /* This defines `enum condition`. */ /*!types:re2c*/ -enum symbol { END, TEXT, BANG, BRACKET, WHITE, MAP }; /** scanner reads a file and extracts semantic information. Valid to access only while underlying pointers do not change. */ -struct scanner { +static struct scan { /* `re2c` variables; these point directly into `buffer`. */ const char *marker, *ctx_marker, *from, *cursor; /* Weird `c2re` stuff: these fields have to come after when >5? */ - const char *label, *buffer, *s0, *s1; + const char *label, *buffer; enum condition condition; - enum symbol symbol; size_t line; - int ws_before; -}; + int is_ws_expected; +} scan; /* Terrible, gah. */ + +void lex_reset(const char *const buffer) { + scan.marker = scan.ctx_marker = scan.from = scan.cursor = scan.label + = scan.buffer = buffer; + scan.condition = 0; + scan.line = 1; +} /*!re2c re2c:flags:tags = 1; -re2c:define:YYCURSOR = s->cursor; -re2c:define:YYMARKER = s->marker; -re2c:define:YYCTXMARKER = s->ctx_marker; +re2c:define:YYCURSOR = scan.cursor; +re2c:define:YYMARKER = scan.marker; +re2c:define:YYCTXMARKER = scan.ctx_marker; re2c:define:YYCONDTYPE = 'condition'; -re2c:define:YYGETCONDITION = 's->condition'; +re2c:define:YYGETCONDITION = 'scan.condition'; re2c:define:YYGETCONDITION:naked = 1; -re2c:define:YYSETCONDITION = 's->condition = @@;'; +re2c:define:YYSETCONDITION = 'scan.condition = @@;'; re2c:define:YYSETCONDITION:naked = 1; // Eof is marked by null when preparing files for lexing. @@ -105,31 +110,52 @@ number = ([1-9][0-9]* | [0])? "." [0-9]+ | [1-9][0-9]* | [0]; id = [a-zA-Z_][a-zA-Z_\-0-9]{0,63}; */ -int lex(struct scanner *const s) { +int lex_next(struct lex *const x) { const char *s0, *s1; + const size_t prev_line = scan.line; /*!stags:re2c format = 'const char *@@;\n'; */ - s->ws_before = 0; + assert(x); + if(!scan.buffer) return 0; + x->s0 = x->s1 = 0; + x->line = prev_line; + x->ws_before = 0; + x->new_paragraph = 0; scan: /*!re2c - end { return s->symbol = END, 1; } + end { printf("end\n");return x->symbol = END, 0; } + end { return x->symbol = ERROR, 0; } // fixme: paragraphs. - newline { s->line++; s->ws_before = 1; goto scan; } - ws+ { s->ws_before = 1; goto scan; } - @s0 glyph+ @s1 { s->s0 = s0, s->s1 = s1; - return s->symbol = TEXT, 1; } - @s0 "!" @s1 { s->s0 = s0, s->s1 = s1; - return s->symbol = BANG, 1; } - "\\" @s0 "[" @s1 { s->s0 = s0, s->s1 = s1; - return s->symbol = BRACKET, 1; } + newline + { x->line = ++scan.line; + x->new_paragraph = prev_line + 2 >= scan.line; + x->ws_before = 1; + goto scan; } + ws+ { x->ws_before = 1; goto scan; } + @s0 glyph+ @s1 { x->s0 = s0, x->s1 = s1; + return x->symbol = TEXT, 1; } + @s0 "!" @s1 { x->s0 = s0, x->s1 = s1; + return x->symbol = BANG, 1; } + "\\" @s0 "[" @s1 { x->s0 = s0, x->s1 = s1; + return x->symbol = BRACKET, 1; } "![" :=> image "[" :=> command - * { return 0; } + * { printf("image(broken)\n");return 0; } ws* "osm" ws* "](geo:" @s0 number "," @s1 number ")" { - s->condition = yyctext; - s->s0 = s0, s->s1 = s1; + scan.condition = yyctext; + x->s0 = s0, x->s1 = s1; printf("Got a map.\n"); return 1; } - * { return 0; } + ws+ { scan.is_ws_expected = 0; goto scan; } + "source" + { if(scan.is_ws_expected) return x->symbol = ERROR, 0; + return scan.is_ws_expected = 1, x->symbol = SOURCE, 1; } + "default" + { if(scan.is_ws_expected) return 0; + return scan.is_ws_expected = 1, x->symbol = DEFAULT, 1; } + ":" :=> command_args + * { return x->symbol = ERROR, 0; } + [^\n\r\]\x00]+ { printf("Command args fixme\n"); goto scan; } + "]" :=> text */ }