interpret/src/scan.re.c

498 lines
18 KiB
C

/** @license 2022 Neil Edelman, distributed under the terms of the
[MIT License](https://opensource.org/licenses/MIT).
Scan journal. */
#include "../src/journal.h"
#include "../src/scan.h"
#include <stdio.h>
#include <assert.h>
/* Tree mapping from date-line in the journal to indices in whatever array. */
static void linemap_to_string(const union line64 line, const size_t *const u,
char (*const a)[12]) { (void)u; date32_to_string(line.date, a); }
static int linemap_compare(const union line64 a, const union line64 b)
{ return a.u64 > b.u64; }
#define TREE_NAME linemap
#define TREE_KEY union line64
#define TREE_VALUE size_t
#define TREE_COMPARE
#define TREE_TO_STRING
#define TREE_DEFAULT 0
#define TREE_BODY
#include "../src/tree.h"
/* Array of sources. */
static void source_to_string(const struct source *const s,
char (*const z)[12]) {
const char *a = s->name.a, *b;
char *y = *z;
b = s->name.b <= a + 11 ? s->name.b : a + 11;
while(a < b) *(y++) = *(a++);
*y = '\0';
}
#define ARRAY_NAME source
#define ARRAY_TYPE struct source
#define ARRAY_TO_STRING
#define ARRAY_BODY
#include "../src/array.h"
/* Array of scores. */
static void score_to_string(const struct score *const s,
char (*const z)[12]) {
const char *a = s->name.a, *b;
char *y = *z;
b = s->name.b <= a + 11 ? s->name.b : a + 11;
while(a < b) *(y++) = *(a++);
*y = '\0';
}
#define ARRAY_NAME score
#define ARRAY_TYPE struct score
#define ARRAY_TO_STRING
#define ARRAY_BODY
#include "../src/array.h"
/* Glider tree. */
static void glider_to_string(const union line64 line, const struct glider *g,
char (*const a)[12]) { (void)g; date32_to_string(line.date, a); }
static int glider_compare(const union line64 a, const union line64 b)
{ return a.u64 > b.u64; }
#define TREE_NAME glider
#define TREE_KEY union line64
#define TREE_VALUE struct glider
#define TREE_COMPARE
#define TREE_TO_STRING
#define TREE_BODY
#include "../src/tree.h"
/*!conditions:re2c*/
static int scan_day(struct scan *const scan, union date32 date,
const char *const buffer) {
const char *YYCURSOR, *YYMARKER, *yyt1, *yyt2, *s0, *s1, *t0, *t1;
enum YYCONDTYPE condition = yycline;
size_t line = 1;
char datestr[12] = {0};
const char *fail = "perhaps a bat?";
struct score *new_score = 0;
struct glider *new_glider = 0;
assert(scan && date.u32 && buffer);
YYCURSOR = YYMARKER = yyt1 = buffer;
/*!re2c /**/
re2c:define:YYCTYPE = char;
re2c:yyfill:enable = 0;
re2c:define:YYGETCONDITION = "condition";
re2c:define:YYSETCONDITION = "condition = @@;";
re2c:define:YYGETCONDITION:naked = 1;
re2c:define:YYSETCONDITION:naked = 1;
ws = [ \t];
glyph = [^\x00-\x20\x7f]; // [^\x00\n\t ] + all weird
semitext = glyph \ ";";
natural = [1-9][0-9]*;
zero_natural = natural | "0";
uint = [0-9]+;
keyword = [A-Za-z0-9][A-Za-z0-9_-]*;
date = natural "-" [0-1][0-9] "-" [0-3][0-9];
minutes = [0-5][0-9];
airport = [A-Z0-9]{4,4};
*/
for( ; ; ) {
/*!re2c /**/
<skip> [^\n\x00] { continue; } /* Default ignore. */
<skip> "\x00" { fail = "no newline at end of file"; goto catch; }
<line> "\x00" { return 1; } /* End of day. */
<line, skip> "\n" => line { line++; continue; }
<line> * :=> skip
<line> "--" / [^-] :=> source
<line> "::" / [^:] :=> score
<line> "[glider]" :=> glider_type
<source> * { fail = "source unrecognized"; goto catch; }
<source> @s0 keyword @s1 / "\n" => skip { also_source: {
const struct pair keyword = pair(s0, s1);
const union line64 key = { { (uint32_t)line, date } };
size_t i, *pi;
if(line > UINT32_MAX)
{ errno = ERANGE; fail = "too many lines of text"; goto catch; }
if(!(i = pair_map_table_get(&scan->sources.map, keyword)))
{ fail = "keyword not introduced"; goto catch; }
switch(linemap_tree_bulk_assign(&scan->sources.dates, key, &pi)) {
case TREE_PRESENT: fail = "duplicate"; case TREE_ERROR: goto catch;
case TREE_ABSENT: *pi = i; break;
}
date32_to_string(date, &datestr);
fprintf(stderr, "%s: source <<%.*s>>\n",
datestr, (int)(s1 - s0), s0);
} continue; }
/* New source. fixme: desc not set. */
<source> @s0 keyword @s1 ":" [^\x00\n]+ / "\n" => skip {
struct pair keyword = pair(s0, s1);
size_t *idx;
struct source *source;
switch(pair_map_table_assign(&scan->sources.map, keyword, &idx)) {
case TABLE_PRESENT: errno = EDOM; fail = "new keyword already used";
case TABLE_ERROR: goto catch; /* /\ _Sic_. */
case TABLE_ABSENT: *idx = 0; break; /* Good. */
}
if(!(source = source_array_new(&scan->sources.array))) goto catch;
*idx = (size_t)(source - scan->sources.array.data);
source->name.a = s0, source->name.b = s1;
source->desc.a = 0, source->desc.b = 0;
date32_to_string(date, &datestr);
fprintf(stderr, "%s: new source <<%.*s>> stored in list at %zu.\n",
datestr, (int)(s1 - s0), s0, *idx);
goto also_source;
}
<score> * { fail = "score unrecognized"; goto catch; }
/* Already there. Use the map to get the index from the keyword and
then stick a marker in the tree with that index. */
<score> @s0 keyword @s1 / "\n" => skip { new_score: {
const struct pair keyword = pair(s0, s1);
const union line64 key = { { (uint32_t)line, date } };
size_t idx, *pidx;
if(line > UINT32_MAX)
{ errno = ERANGE; fail = "too many lines of text"; goto catch; }
if(!(idx = pair_map_table_get(&scan->scores.map, keyword)))
{ fail = "keyword not introduced"; goto catch; }
if(scan->scores.array.data[idx].last.u32 >= date.u32)
{ fail = "duplicate key in same day"; goto catch; }
scan->scores.array.data[idx].last.u32 = date.u32;
switch(linemap_tree_bulk_assign(&scan->scores.dates, key, &pidx)) {
case TREE_PRESENT: assert(0); fail = "duplicate key"; /* _Sic_. */
case TREE_ERROR: goto catch;
case TREE_ABSENT: *pidx = idx; break;
}
date32_to_string(date, &datestr);
fprintf(stderr, "%s: score <%.*s>\n", datestr, (int)(s1 - s0), s0);
} continue; }
/* New score. */
<score> @s0 keyword @s1 ":" => score_name {
size_t *idx;
assert(!new_score);
/* Create a new mapping from dateline to scores array. */
switch(pair_map_table_assign(&scan->scores.map,
pair(s0, s1), &idx)) {
case TABLE_PRESENT: errno = EDOM; fail = "new keyword already used";
case TABLE_ERROR: goto catch; /* _Sic_. */
case TABLE_ABSENT: *idx = 0; break;
}
/* New entry in the scores array for this map to point to. */
if(!(new_score = score_array_new(&scan->scores.array))) goto catch;
*idx = (size_t)(new_score - scan->scores.array.data); /* Offset. */
/*struct pair key, name; union date32 date, last; unsigned edges;*/
new_score->key.a = s0, new_score->key.b = s1;
new_score->name.a = 0, new_score->name.b = 0;
new_score->date.u32 = new_score->last.u32 = 0;
new_score->edges = 0, new_score->score = 0;
date32_to_string(date, &datestr);
fprintf(stderr, "%s: new score <%.*s> stored in list at %zu.\n",
datestr, (int)(s1 - s0), s0, *idx);
goto new_score;
}
<score_name> * { fail = "name unrecognized"; goto catch; }
<score_name> ws* @s0 semitext+ (" " semitext+)* @s1 /* ws* */ ";"
=> score_date {
assert(new_score);
new_score->name.a = s0, new_score->name.b = s1;
}
<score_date> * { fail = "date unrecognized"; goto catch; }
<score_date> ws* "~"? @s0 date ws* ";" => score_edges {
assert(new_score);
if(!pair_to_date(s0, &new_score->date)) goto catch;
}
<score_edges> * { fail = "edges unrecognized"; goto catch; }
<score_edges> ws* "~"? @s0 uint @s1 ws* / "\n" => skip {
assert(new_score);
if(!pair_to_natural(s0, s1, &new_score->edges)) goto catch;
new_score = 0; /* Done. */
continue;
}
/* type, reg, launch, how, height, landing, pilot, dual, instr, remarks
eg, [glider] 2-33A; C-GCLK; CYQQ; A; 2000'; CYQQ; ;:13;; Peters D1 */
<glider_type> * { fail = "glider type"; goto catch; }
<glider_type> ws* @s0 semitext+ @s1 ws* ";" => glider_reg {
const union line64 key
= {{ (uint32_t)line, {{ date.day, date.month, date.year }} }};
assert(!new_glider);
if(line > UINT32_MAX) { fail = "line overflow"; goto catch; }
switch(glider_tree_bulk_assign(&scan->gliders, key, &new_glider)) {
case TREE_PRESENT: fail = "duplicate"; case TREE_ERROR: goto catch;
case TREE_ABSENT: break;
}
new_glider->type.a = s0, new_glider->type.b = s1;
continue;
}
<glider_reg> * { fail = "glider reg"; goto catch; }
<glider_reg> ws* @s0 semitext+ @s1 ws* ";" => glider_launch
{ new_glider->reg.a = s0, new_glider->reg.b = s1; continue; }
<glider_launch> * { fail = "glider launch"; goto catch; }
<glider_launch> ws* @s0 airport @s1 ws* ";" => glider_how
{ new_glider->launch.a = s0, new_glider->launch.b = s1; continue; }
<glider_how> * { fail = "glider how"; goto catch; }
<glider_how> ws* @s0 [MWA] ws* ";" => glider_height {
switch(*s0) {
case 'M': new_glider->how = MotorCarTow; break;
case 'W': new_glider->how = Winch; break;
case 'A': new_glider->how = AeroTow; break;
}
continue;
}
<glider_height> * { fail = "glider height"; goto catch; }
<glider_height> ws* @s0 natural @s1 "'" ws* ";" => glider_landing
{ if(!pair_to_natural(s0, s1, &new_glider->height_ft)); continue; }
<glider_landing> * { fail = "glider landing"; goto catch; }
<glider_landing> ws* @s0 airport @s1 ws* ";" => glider_pilot
{ new_glider->landing.a = s0, new_glider->landing.b = s1; continue; }
<glider_pilot> * { fail = "glider pilot"; goto catch; }
<glider_pilot> ws* ";" => glider_dual /* not PIC */
{ new_glider->pilot_min = 0; continue; }
<glider_pilot> ws* @s0 natural? @s1 ":" @t0 minutes @t1 ws* ";"
=> glider_dual { if(!pair_colon_to_minutes(s0, s1, t0, t1,
&new_glider->pilot_min)) { fail = "pilot time"; goto catch; }
continue; }
<glider_dual> * { fail = "glider dual"; goto catch; }
<glider_dual> ws* ";" => glider_instr
{ new_glider->dual_min = 0; continue; }
<glider_dual> ws* @s0 natural? @s1 ":" @t0 minutes @t1 ws* ";"
=> glider_instr { if(!pair_colon_to_minutes(s0, s1, t0, t1,
&new_glider->dual_min)) { fail = "dual time"; goto catch; }
continue; }
<glider_instr> * { fail = "glider instr"; goto catch; }
<glider_instr> ws* ";" => glider_remarks
{ new_glider->instr_min = 0; continue; }
<glider_instr> ws* @s0 natural? @s1 ":" @t0 minutes @t1 ws* ";"
=> glider_remarks { if(!pair_hours_to_minutes(s0, s1, t0, t1,
&new_glider->instr_min)) { fail = "instr time"; goto catch; }
continue; }
<glider_remarks> * { fail = "glider remarks"; goto catch; }
<glider_remarks> ws* "\n" => line
{ new_glider->remarks.a = new_glider->remarks.b = 0;
new_glider = 0; line++; continue; }
<glider_remarks> ws* @s0 glyph+ (" " glyph+)* @s1 "\n" => line
{ new_glider->remarks.a = s0, new_glider->remarks.b = s1;
new_glider = 0; line++; continue; }
*/ }
assert(0); /* Never gets here. */
catch:
if(!errno) errno = EILSEQ;
date32_to_string(date, &datestr);
fprintf(stderr, "%s line %zu: %s condition %d.\n", datestr, line, fail, condition);
return 0;
}
void scan_(struct scan *const scan) {
if(!scan) return;
linemap_tree_(&scan->scores.dates);
pair_map_table_(&scan->scores.map);
score_array_(&scan->scores.array);
}
/** @param[jrnl] Must be constant throughout the use of the returned value. */
struct scan scan(struct journal *const jrnl) {
struct scan scan = {0};
struct journal_iterator it;
union date32 date;
const char *text;
assert(jrnl);
/* Null is the first item for convenience, (TABLE_DEFAULT). */
{
struct source *nul;
if(!(nul = source_array_new(&scan.sources.array))) goto catch;
*nul = (struct source){0};
}
{
struct score *nul;
if(!(nul = score_array_new(&scan.scores.array))) goto catch;
*nul = (struct score){0};
}
/* Scan all. */
it = journal_iterator(jrnl);
while(journal_next(&it, &date, &text))
if(!scan_day(&scan, date, text)) goto catch;
/* Scans make trees bulk-loaded; fix to real tree. */
if(!linemap_tree_bulk_finish(&scan.sources.dates)
|| !linemap_tree_bulk_finish(&scan.scores.dates)) goto catch;
fprintf(stderr, "List of scores: %s.\n"
"Mapped to indices: %s.\n"
"Date-line tree: %s.\n",
score_array_to_string(&scan.scores.array),
pair_map_table_to_string(&scan.scores.map),
linemap_tree_to_string(&scan.scores.dates));
goto finally;
catch:
scan_(&scan);
finally:
return scan;
}
/** Lookup the last source in `range` in sources `s`. They are invalidated on
adding a source, (currently fine because we get all at once.) */
static const struct source *source_lookup(struct scan *const scan,
const union line64 x) {
struct linemap_tree_iterator it;
assert(scan);
it = linemap_tree_less(&scan->sources.dates, x);
/* If it's before all elements of the journal or is not on the same date as
the source, this has no source, which is `array[0]`. */
return scan->sources.array.data + (linemap_tree_has_element(&it)
&& x.date.u32 == linemap_tree_key(&it).date.u32
? *linemap_tree_value(&it) : 0);
}
void scan_score_graph(struct scan *const scan) {
struct linemap_tree_iterator it
= linemap_tree_iterator(&scan->scores.dates);
union line64 line;
struct score *score;
assert(scan);
/* Set score to zero to verify count with paper journal. */
for(struct score *i = scan->scores.array.data,
*const z = i + scan->scores.array.size; i < z; i++) i->score = 0;
/* 840 with legend; only useful to me. */
printf("set terminal pngcairo dashed transparent truecolor"
" size 840, 480 fontscale 1\n"
"set output \"score.png\"\n");
printf("$Data <<EOD\n"
"# date, key, key score\n");
while(linemap_tree_next(&it)) {
line = linemap_tree_key(&it);
assert(*linemap_tree_value(&it) < scan->scores.array.size);
score = scan->scores.array.data + *linemap_tree_value(&it);
char datestr[12];
date32_to_string(line.date, &datestr);
score->score++;
printf("%s, %.*s, %u\n", datestr,
(int)(score->key.b - score->key.a), score->key.a, score->score);
}
printf("EOD\n"
"# theozh https://stackoverflow.com/a/75466214/2472827\n"
"# get a unique list from datablock\n"
"addToList(list,col) = list.( strstrt(list,'\"'.strcol(col).'\"') \\\n"
" > 0 ? '' : ' \"'.strcol(col).'\"')\n"
"Uniqs = ''\n"
"stats $Data u (Uniqs=addToList(Uniqs,2)) nooutput\n"
"Uniq(i) = word(Uniqs,i)\n"
"getIndex(s) = sum [_i=1:words(Uniqs)] s eq word(Uniqs,_i) ? _i : 0\n"
"\n"
/*"stats $Data u 3 nooutput\n"
"cumsum = STATS_sum\n"
"stats $Data u 4 nooutput\n"
"setsum = STATS_sum\n"
"\n"*/
"myTimeFmt = \"%%Y-%%m-%%d\"\n"
"set format x myTimeFmt timedate\n"
"set xtics format myTimeFmt rotate by -30\n"
"set ylabel \"happiness and satisfaction CDF (days)\"\n"
"set grid\n"
"set key out reverse Left noautotitle\n"
"set style fill solid 0.5\n"
"unset border\n"
"unset key\n"
"set autoscale xfix # max? hack: can't get x to extend further\n"
"\n"
/*"set label sprintf(\"%%u cumulative words (duplicate verses counted)\","
" cumsum) center at graph 0.5, first cumsum*100/%zu offset 0,0.5\n"
"set label sprintf(\"%%u unique KJV verse words memorized\", setsum) "
"center at graph 0.5, first setsum*100/%zu offset 0,0.5\n"
"\n"*/
"plot \\\n"
" total=0 $Data u"
" (timecolumn(1,myTimeFmt)):(total=total+1) \\\n"
" w steps lc \"black\" dt 1 lw 1, \\\n"
" total=0 '' u \\\n"
" (timecolumn(1,myTimeFmt)): \\\n"
" (total=total+1,total/2.): \\\n"
" (43200): \\\n"
" (total/2.): \\\n"
" (getIndex(strcol(2))) w boxxy lc var lw 1, \\\n"
" for [i=1:words(Uniqs)] keyentry w boxxy lc i ti Uniq(i)\n");
}
#include <inttypes.h>
void scan_glider_graph(struct scan *const scan) {
assert(scan);
fprintf(stderr, "Glider: %s.\n", glider_tree_to_string(&scan->gliders));
printf("set terminal pngcairo dashed transparent truecolor"
" size 840, 480 fontscale 1\n"
"set output \"glider.png\"\n");
/*printf("set terminal cairolatex standalone pdf size 16cm,10.5cm"
" dashed transparent\n"
"set output \"flight.tex\"\n");*/
/*printf("set term postscript eps enhanced\n"
"set output \"flighthours.eps\"\n");*/
printf("$Data <<EOD\n"
"# date, reg, sic, pic, source\n");
struct glider_tree_iterator it = glider_tree_iterator(&scan->gliders);
while(glider_tree_next(&it)) {
const union line64 line = glider_tree_key(&it);
const struct glider *glider = glider_tree_value(&it);
char datestr[12];
date32_to_string(line.date, &datestr);
const struct source *src = source_lookup(scan, line);
assert(src);
if(!src->name.a) { fprintf(stderr,
"Glider has no source at %s; ignoring.\n", datestr); continue; }
printf("%s, ", datestr);
printf("%.*s, %" PRIu32 ", %" PRIu32,
(int)(glider->reg.b - glider->reg.a), glider->reg.a,
glider->dual_min, glider->pilot_min + glider->instr_min);
/* case POWER:
printf("%.*s, %" PRIu32 ", %" PRIu32,
(int)(flight->power.reg.b - flight->power.reg.a),
flight->power.reg.a,
flight->power.dual_min,
flight->power.pilot_min);
break; */
printf(", %.*s\n", (int)(src->name.b - src->name.a), src->name.a);
}
printf("EOD\n"
"# theozh https://stackoverflow.com/a/75466214/2472827\n"
"# get a unique list from datablock\n"
"addToList(list,col) = list.( strstrt(list,'\"'.strcol(col).'\"') \\\n"
" > 0 ? '' : ' \"'.strcol(col).'\"')\n"
"Uniqs = ''\n"
"stats $Data u (Uniqs=addToList(Uniqs,2)) nooutput\n"
"Uniq(i) = word(Uniqs,i)\n"
"getIndex(s) = sum [_i=1:words(Uniqs)] s eq word(Uniqs,_i) ? _i : 0\n"
"\n"
"stats $Data u 3 nooutput\n"
"sicsum = STATS_sum\n"
"stats $Data u 4 nooutput\n"
"picsum = STATS_sum\n"
"\n"
"myTimeFmt = \"%%Y-%%m-%%d\"\n"
"set format x myTimeFmt timedate\n"
"set xtics format myTimeFmt rotate by -30\n"
"set format y \"%%tH:%%tM\" timedate\n"
"set grid\n"
"set key out reverse Left noautotitle\n"
"set style fill solid 0.5\n"
"unset border\n"
"plot total=0 $Data u"
" (timecolumn(1,myTimeFmt)):(dy=($3+$4)*60,total=total+dy)"
" w steps lc \"black\" dt 3, \\\n"
" total=0 '' u (timecolumn(1,myTimeFmt)):"
"(dy=($3+$4)*60,total=total+dy,total/2.): \\\n"
" (43200):(total/2.):(getIndex(strcol(2))) w boxxy lc var, \\\n"
" for [i=1:words(Uniqs)] keyentry w boxxy lc i ti Uniq(i)\n"
/*"set xrange [*:'2001-09-11']\n"*/
/*"#set style fill solid 0.1 #pattern 5 (better, but restarts)\n"
"plot $Data using 1:($6/60) with fillsteps lw 2\n"*/);
}