interpret/src/interpret.c

379 lines
11 KiB
C
Raw Normal View History

2022-04-01 02:09:02 -04:00
/** @std GNU C11 */
2022-02-13 01:35:02 -05:00
#include "lex.h"
#include <unistd.h> /* chdir (POSIX) */
#include <sys/types.h> /* mode_t (POSIX) */
#include <sys/stat.h> /* umask (POSIX) */
#include <dirent.h> /* opendir readdir closedir */
2022-02-10 23:30:11 -05:00
#include <limits.h>
2022-02-13 01:35:02 -05:00
#include <stdio.h>
#include <stdlib.h>
2022-04-01 02:09:02 -04:00
#include <stdint.h>
2022-02-10 23:30:11 -05:00
2022-02-11 04:46:41 -05:00
#if INT_MAX >= 100000000000
#error int_to_string requires truncation on this compiler.
#endif
2022-07-06 13:02:28 -04:00
static void int_to_string(const int *const n, char (*const a)[12])
{ sprintf(*a, "%d", *n); }
2022-02-11 04:46:41 -05:00
#define ARRAY_NAME int
#define ARRAY_TYPE int
#define ARRAY_EXPECT_TRAIT
#include "array.h"
2022-02-11 04:46:41 -05:00
#define ARRAY_TO_STRING &int_to_string
#include "array.h"
2022-02-11 04:46:41 -05:00
static int int_cmp(const int *const a, const int *const b)
{ return (*b < *a) - (*a < *b); }
static int void_int_cmp(const void *const a, const void *const b)
{ return int_cmp(a, b); }
2022-02-10 23:30:11 -05:00
2022-02-13 23:45:38 -05:00
#define ARRAY_NAME char
#define ARRAY_TYPE char
#include "array.h"
/** Append a text file, `fn`, to `c`, and add a '\0'.
@return Success. A partial read is failure. @throws[fopen, fread, malloc]
@throws[EISEQ] The text file has embedded nulls.
@throws[ERANGE] If the standard library does not follow POSIX. */
static int append_file(struct char_array *c, const char *const fn) {
FILE *fp = 0;
const size_t granularity = 1024;
size_t nread;
char *cursor;
int success = 0;
assert(c && fn);
if(!(fp = fopen(fn, "r"))) goto catch;
/* Read entire file in chunks. */
do if(!(cursor = char_array_buffer(c, granularity))
|| (nread = fread(cursor, 1, granularity, fp), ferror(fp))
|| !char_array_append(c, nread)) goto catch;
while(nread == granularity);
/* File to `C` string. */
if(!(cursor = char_array_new(c))) goto catch;
*cursor = '\0';
/* Binary files with embedded '\0' are not allowed. */
if(strchr(c->data, '\0') != cursor) { errno = EILSEQ; goto catch; }
{ success = 1; goto finally; }
catch:
if(!errno) errno = EILSEQ; /* Will never be true on POSIX. */
finally:
if(fp && fclose(fp)) success = 0;
return success;
}
/** Is `y` a leap-year? */
static int leap(int y) {
assert(y >= 1582);
if(!(y % 400)) return 1;
if(!(y % 100)) return 0;
if(!(y % 4)) return 1;
return 0;
}
2022-07-06 13:02:28 -04:00
/** Assumes: reverse ordering of byte-fields; unsigned is defined; C11 and GNU
anonymous unions. */
2022-04-01 02:09:02 -04:00
union date32 {
2022-05-28 02:09:38 -04:00
uint32_t u32;
struct { unsigned day : 5, month : 4, year : 23; };
2022-04-01 02:09:02 -04:00
};
2022-05-28 02:09:38 -04:00
static int date_mixup(union date32 a, union date32 b) { return a.u32 > b.u32; }
static void date32_to_string(const union date32 d, char (*const z)[12]) {
assert(d.year < 10000 && d.month && d.month <= 31 && d.day && d.day <= 31);
sprintf(*z, "%u-%2.2u-%2.2u", d.year % 10000, d.month, d.day);
}
/** Convert or narrower type or return zero. */
2022-04-01 02:09:02 -04:00
static union date32 date_to_32(const int y, const int m, const int d) {
union date32 d32 = { 0 };
/* Leap year calculations only work at y>=1 and Gregorian Calendar and max
23 bits. */
2022-07-06 13:02:28 -04:00
if(y < 1582 || y > 8388607 || m < 1 || m > 12 || d < 1 || d > 31) goto no;
switch(m) {
case 1: case 3: case 5: case 7: case 8: case 10: case 12: break;
2022-07-05 22:21:56 -04:00
case 4: case 6: case 9: case 11: if(d > 30) goto no; break;
case 2: if(d > 28 + leap(y)) goto no; break;
default: assert(0); break;
}
d32.year = (unsigned)y, d32.month = (unsigned)m, d32.day = (unsigned)d;
2022-07-05 22:21:56 -04:00
no:
return d32;
}
/** Tomohiko Sakamoto comp.lang.c 1993-04-10. */
2022-04-01 02:09:02 -04:00
static unsigned weekday(union date32 d) {
d.year -= d.month < 3;
return (d.year + d.year / 4 - d.year / 100 + d.year / 400
+ "-bed=pen+mad."[d.month] + d.day) % 7;
}
2022-05-28 02:09:38 -04:00
2022-07-06 13:02:28 -04:00
/* Contained in <lex.h> to share with <lex.re_c.c>. */
#define ARRAY_NAME lex
#define ARRAY_TYPE struct lex
#include "array.h"
2022-05-28 02:09:38 -04:00
struct page_tree_entry_c;
static void entry_to_string(struct page_tree_entry_c, char (*)[12]);
struct page {
struct char_array entry;
struct lex_array lexx;
};
2022-05-28 02:09:38 -04:00
#define TREE_NAME page
#define TREE_KEY union date32
#define TREE_VALUE struct page
#define TREE_COMPARE &date_mixup
2022-05-28 02:15:21 -04:00
#define TREE_EXPECT_TRAIT
#include "tree.h"
#define TREE_TO_STRING &entry_to_string
2022-05-28 02:09:38 -04:00
#include "tree.h"
static void entry_to_string(const struct page_tree_entry_c entry,
char (*const z)[12]) { date32_to_string(*entry.key, z); }
struct source { char *key, *desc; };
2022-07-06 13:02:28 -04:00
/*
### plot with steps
reset session
$Data <<EOD
1,1,0
1,2,0
1,3,0
1,4,2
1,5,1
1,6,3
1,7,3
1,8,1
1,9,3
1,10,8
1,11,1
1,12,0
1,13,3
EOD
set title "Cumulative count" font ",16"
set xlabel "episode"
set ylabel "cumulative count"
set xtics 1
set key bottom right
set grid
unset border
set datafile separator comma
plot $Data u 2:($3) smooth cumulative with steps lw 2 lc "red" ti "cumulative count"
### end of code
*/
static int bible_graph(/*const*/ struct page_tree *const journal) {
enum { CHILL, BOOK, CHAPTER, WORD, NEXT } state = CHILL;
struct page_tree_entry entry = { 0, 0 };
struct lex *lex = 0;
size_t count = 0;
for(struct page_tree_iterator p_it = page_tree_iterator(journal);
(entry = page_tree_next(&p_it)).key; ) {
struct page *const page = entry.value;
for(struct lex_array_iterator l_it = lex_array_iterator(&page->lexx);
(lex = lex_array_next(&l_it)); ) {
switch(lex->symbol) {
case BIBLE_BOOK:
if(state != CHILL && state != WORD) goto catch;
if(state == WORD) printf("\n");
fprintf(stderr, "%d-%.2d-%.2d: %.*s ",
entry.key->year, entry.key->month, entry.key->day,
(int)(lex->s1 - lex->s0), lex->s0);
state = BOOK;
break;
case BIBLE_CHAPTER_VERSE:
if(state != BOOK) goto catch;
printf("%.*s -- \"", (int)(lex->s1 - lex->s0), lex->s0);
state = CHAPTER;
break;
case BIBLE_NEXT:
if(state != WORD) goto catch;
printf("\"\n");
break;
case BIBLE_TEXT:
if(state != WORD && state != CHAPTER && state != NEXT)
goto catch;
printf("%s%.*s", state == WORD ? "*" : "",
(int)(lex->s1 - lex->s0), lex->s0);
count++;
state = WORD;
break;
default:
if(state != CHILL && state != WORD) goto catch;
if(state == WORD) printf("\"\n"), state = CHILL;
break;
}
}
if(state != CHILL && state != WORD) goto catch;
if(state == WORD) printf("\n"), state = CHILL;
}
printf("Count: %lu.\n", (unsigned long)count);
return 1;
catch:
fprintf(stderr, "Bible error.\n");
if(entry.key) {
fprintf(stderr, "On date: %d-%.2d-%.2d.\n",
entry.key->year, entry.key->month, entry.key->day);
if(lex) fprintf(stderr, "At line %lu.\n", (unsigned long)lex->line);
}
errno = EILSEQ;
return 0;
}
2022-02-10 23:30:11 -05:00
int main(int argc, char **argv) {
2022-05-28 02:09:38 -04:00
int success = EXIT_SUCCESS;
2022-07-05 23:48:40 -04:00
char *intent = 0;
2022-05-24 22:47:13 -04:00
/* For reading in files, overwritten. */
2022-02-13 01:17:45 -05:00
DIR *dir = 0;
struct dirent *de;
2022-05-28 02:09:38 -04:00
struct int_array years = int_array(), months = int_array(),
days = int_array();
2022-02-13 01:17:45 -05:00
int *y, *y_end;
2022-02-11 03:28:28 -05:00
2022-05-28 02:09:38 -04:00
struct page_tree journal = page_tree();
2022-05-24 22:47:13 -04:00
2022-02-11 03:28:28 -05:00
errno = 0;
2022-07-05 23:48:40 -04:00
if(argc != 2) { intent = "needs journal location, which should"
" contain <year>/<month>/<day>.txt"; goto catch; }
2022-05-24 22:47:13 -04:00
/* Get the years list as directories matching a year. */
2022-02-13 01:17:45 -05:00
if(chdir(argv[1]) == -1 || !(dir = opendir("."))) goto catch;
while((de = readdir(dir))) {
2022-02-11 03:28:28 -05:00
struct stat st;
2022-02-11 04:46:41 -05:00
int year, *p;
2022-02-13 23:45:38 -05:00
if(!lex_looks_like_year(de->d_name, &year)) continue;
2022-02-13 01:17:45 -05:00
if(stat(de->d_name, &st)) goto catch;
2022-02-11 04:46:41 -05:00
if(!S_ISDIR(st.st_mode)) continue;
if(!(p = int_array_new(&years))) goto catch;
*p = year;
2022-02-11 03:28:28 -05:00
}
2022-02-13 01:17:45 -05:00
closedir(dir), dir = 0;
2022-04-01 02:09:02 -04:00
/* Sort the years for sensible ordering of parsing. */
2022-02-11 04:46:41 -05:00
qsort(years.data, years.size, sizeof *years.data, &void_int_cmp);
2022-07-06 13:02:28 -04:00
fprintf(stderr, "(In %s: %s.)\n", argv[1], int_array_to_string(&years));
2022-02-13 01:17:45 -05:00
/* Go though each year. */
for(y = years.data, y_end = y + years.size; y < y_end; y++) {
char fn[64];
2022-02-13 01:17:45 -05:00
int *m, *m_end;
sprintf(fn, "%d", *y);
2022-02-13 01:17:45 -05:00
/* Get the months as directories. */
if(chdir(fn) == -1 || !(dir = opendir("."))) goto catch;
2022-02-13 01:17:45 -05:00
while((de = readdir(dir))) {
struct stat st;
int month, *p;
2022-02-13 23:45:38 -05:00
if(!(month = lex_looks_like_month(de->d_name))) continue;
2022-02-13 01:17:45 -05:00
if(stat(de->d_name, &st)) goto catch;
if(!S_ISDIR(st.st_mode)) continue;
if(!(p = int_array_new(&months))) goto catch;
*p = month;
}
closedir(dir), dir = 0;
qsort(months.data, months.size, sizeof *months.data, &void_int_cmp);
2022-07-06 13:02:28 -04:00
fprintf(stderr, "(In %s: %s.)\n", fn, int_array_to_string(&months));
2022-02-13 01:17:45 -05:00
/* Go though each month. */
for(m = months.data, m_end = m + months.size; m < m_end; m++) {
int *d, *d_end;
sprintf(fn, "%.2d", *m);
2022-02-13 01:17:45 -05:00
/* Get the days as files. */
if(chdir(fn) == -1 || !(dir = opendir("."))) goto catch;
2022-02-13 01:17:45 -05:00
while((de = readdir(dir))) {
struct stat st;
int day, *p;
/* fixme: Have yyyy-mm-dd to figure out how many days. */
2022-02-13 23:45:38 -05:00
if(!(day = lex_looks_like_day(de->d_name))) continue;
2022-02-13 01:17:45 -05:00
if(stat(de->d_name, &st)) goto catch;
if(S_ISDIR(st.st_mode)) continue;
if(!(p = int_array_new(&days))) goto catch;
*p = day;
}
closedir(dir), dir = 0;
qsort(days.data, days.size, sizeof *days.data, &void_int_cmp);
2022-07-06 13:02:28 -04:00
fprintf(stderr, "(In %s: %s.)\n", fn, int_array_to_string(&days));
2022-02-13 01:17:45 -05:00
for(d = days.data, d_end = d + days.size; d < d_end; d++) {
2022-07-05 23:48:40 -04:00
struct lex *lex = 0;
2022-05-28 02:09:38 -04:00
struct page *page = 0;
2022-07-05 23:48:40 -04:00
union date32 d32;
if(!(d32 = date_to_32(*y, *m, *d)).year) { errno = EILSEQ;
intent = "date parse error"; goto syntax; }
sprintf(fn, "%.2d.txt", *d);
2022-05-28 02:09:38 -04:00
if(page_tree_bulk_add(&journal, d32, &page) != TREE_UNIQUE) {
2022-07-05 23:48:40 -04:00
if(!errno) intent = "not unique", errno = EDOM;
goto syntax;
2022-05-28 02:09:38 -04:00
}
page->entry = char_array();
page->lexx = lex_array();
2022-07-05 23:48:40 -04:00
if(!append_file(&page->entry, fn)) goto syntax;
for(lex_reset(page->entry.data); ; ) {
2022-07-05 23:48:40 -04:00
if(!(lex = lex_array_new(&page->lexx))) goto syntax;
if(!lex_next(lex)) {
2022-07-05 23:48:40 -04:00
if(lex->symbol != END) { errno = EILSEQ; goto syntax; }
break;
}
2022-07-06 17:59:56 -04:00
if(lex->symbol == BIBLE_BOOK
|| lex->symbol == BIBLE_CHAPTER_VERSE)
printf("[%.*s]\n", (int)(lex->s1 - lex->s0), lex->s0);
2022-02-13 23:45:38 -05:00
}
2022-07-05 23:48:40 -04:00
continue;
syntax:
fprintf(stderr, "On date: %d-%.2d-%.2d.\n", *y, *m, *d);
if(!page) goto catch;
printf("While parsing <<<\n%s>>>\n", page->entry.data);
if(!lex) goto catch;
printf("On line %lu: %s",
(unsigned long)lex->line, lex_symbols[lex->symbol]);
if(lex->symbol == TEXT || lex->symbol == ARG_KEYWORD
|| lex->symbol == ARG_DATE
|| lex->symbol == ARG_FREEFORM
|| lex->symbol == CAPTION
|| lex->symbol == BIBLE_TEXT) {
if(lex->s0 + INT_MAX < lex->s1)
{ intent = "line too long"; errno = EILSEQ; }
printf(" <<%.*s>>", (int)(lex->s1 - lex->s0), lex->s0);
}
printf(".\n");
goto catch;
2022-02-13 01:17:45 -05:00
}
2022-02-10 23:30:11 -05:00
2022-02-13 01:17:45 -05:00
int_array_clear(&days);
if(chdir("..") == -1) goto catch;
}
int_array_clear(&months);
if(chdir("..") == -1) goto catch;
2022-07-06 13:02:28 -04:00
/* fixme: Expand, contact is the next thing that it doesn't get. */
if(*y == 1996) break;
2022-02-13 01:17:45 -05:00
}
2022-05-28 02:09:38 -04:00
page_tree_bulk_finish(&journal);
2022-07-05 23:48:40 -04:00
int_array_(&years), int_array_(&months), int_array_(&days);
2022-07-06 13:02:28 -04:00
fprintf(stderr, "Journal has entries: %s\n", page_tree_to_string(&journal));
/* Do something interesting? */
if(!bible_graph(&journal)) goto catch;
2022-05-28 02:09:38 -04:00
goto finally;
2022-02-10 23:30:11 -05:00
catch:
2022-05-28 02:09:38 -04:00
success = EXIT_FAILURE;
2022-02-10 23:30:11 -05:00
perror("interpret");
2022-07-05 23:48:40 -04:00
if(intent) fprintf(stderr, "Further explanation: %s.\n", intent);
2022-02-10 23:30:11 -05:00
finally:
2022-02-13 23:45:38 -05:00
if(dir && closedir(dir)) success = EXIT_FAILURE, perror("dir");
2022-07-05 23:48:40 -04:00
int_array_(&years), int_array_(&months), int_array_(&days);
2022-05-28 02:09:38 -04:00
struct page_tree_entry entry;
2022-07-06 13:02:28 -04:00
for(struct page_tree_iterator it = page_tree_iterator(&journal);
2022-05-28 02:09:38 -04:00
(entry = page_tree_next(&it)).key; ) {
struct page *const page = entry.value;
char z[12];
date32_to_string(*entry.key, &z);
2022-07-06 13:02:28 -04:00
/*printf("Freeing %s.\n", z);*/
2022-05-28 02:09:38 -04:00
lex_array_(&page->lexx);
char_array_(&page->entry);
}
return success;
2022-02-10 23:30:11 -05:00
}
2022-07-02 23:38:17 -04:00
/* Popular KJV 738137 words. */