322 lines
10 KiB
C
322 lines
10 KiB
C
/** @license 2022 Neil Edelman, distributed under the terms of the
|
|
[MIT License](https://opensource.org/licenses/MIT).
|
|
|
|
Scan journal. */
|
|
|
|
#include "../src/journal.h"
|
|
#include "../src/scan.h"
|
|
#include <stdio.h>
|
|
#include <assert.h>
|
|
|
|
/* Tree mapping from date-line in the journal to indices in whatever array. */
|
|
static void linemap_to_string(const union line64 line, const size_t *const u,
|
|
char (*const a)[12]) { (void)u; date32_to_string(line.date, a); }
|
|
static int linemap_compare(const union line64 a, const union line64 b)
|
|
{ return a.u64 > b.u64; }
|
|
#define TREE_NAME linemap
|
|
#define TREE_KEY union line64
|
|
#define TREE_VALUE size_t
|
|
#define TREE_COMPARE
|
|
#define TREE_TO_STRING
|
|
#define TREE_DEFAULT 0
|
|
#define TREE_BODY
|
|
#include "../src/tree.h"
|
|
|
|
/* Array of sources. */
|
|
static void source_to_string(const struct source *const s,
|
|
char (*const z)[12]) {
|
|
const char *a = s->name.a, *b;
|
|
char *y = *z;
|
|
b = s->name.b <= a + 11 ? s->name.b : a + 11;
|
|
while(a < b) *(y++) = *(a++);
|
|
*y = '\0';
|
|
}
|
|
#define ARRAY_NAME source
|
|
#define ARRAY_TYPE struct source
|
|
#define ARRAY_TO_STRING
|
|
#define ARRAY_BODY
|
|
#include "../src/array.h"
|
|
|
|
/* Array of scores. */
|
|
static void score_to_string(const struct score *const s,
|
|
char (*const z)[12]) {
|
|
const char *a = s->name.a, *b;
|
|
char *y = *z;
|
|
b = s->name.b <= a + 11 ? s->name.b : a + 11;
|
|
while(a < b) *(y++) = *(a++);
|
|
*y = '\0';
|
|
}
|
|
#define ARRAY_NAME score
|
|
#define ARRAY_TYPE struct score
|
|
#define ARRAY_TO_STRING
|
|
#define ARRAY_BODY
|
|
#include "../src/array.h"
|
|
|
|
|
|
|
|
|
|
/*!conditions:re2c*/
|
|
|
|
static int scan_day(struct scan *const scan, union date32 date,
|
|
const char *const buffer) {
|
|
const char *YYCURSOR, *YYMARKER, *yyt1, *yyt2, *s0, *s1;
|
|
enum YYCONDTYPE condition = yycline;
|
|
size_t line = 1;
|
|
char datestr[12] = {0};
|
|
const char *fail = "perhaps a bat?";
|
|
struct score *score = 0;
|
|
assert(scan && date.u32 && buffer);
|
|
YYCURSOR = YYMARKER = yyt1 = buffer;
|
|
/*!re2c /**/
|
|
re2c:define:YYCTYPE = char;
|
|
re2c:yyfill:enable = 0;
|
|
re2c:define:YYGETCONDITION = "condition";
|
|
re2c:define:YYSETCONDITION = "condition = @@;";
|
|
re2c:define:YYGETCONDITION:naked = 1;
|
|
re2c:define:YYSETCONDITION:naked = 1;
|
|
|
|
ws = [ \t];
|
|
glyph = [^\x00-\x20\x7f]; // [^\x00\n\t ] + all weird
|
|
semitext = glyph \ ";";
|
|
natural = [1-9][0-9]*;
|
|
uint = [0-9]+;
|
|
keyword = [A-Za-z0-9][A-Za-z0-9_-]*;
|
|
date = natural "-" [0-1][0-9] "-" [0-3][0-9];
|
|
*/
|
|
for( ; ; ) {
|
|
/*!re2c /**/
|
|
/* Default ignore. */
|
|
<skip> [^\n\x00] { continue; }
|
|
<skip> "\x00" { fail = "no newline at end of file"; goto catch; }
|
|
<line> "\x00" { return 1; } /* End of day. */
|
|
<line, skip> "\n" => line { line++; continue; }
|
|
<line> * :=> skip
|
|
<line> "--" / [^-] :=> source
|
|
<line> "::" / [^:] :=> score
|
|
|
|
|
|
<source> * { fail = "source unrecognized"; goto catch; }
|
|
<source> @s0 keyword @s1 / "\n" => skip { also_source: {
|
|
const struct pair keyword = pair(s0, s1);
|
|
const union line64 key = { { (uint32_t)line, date } };
|
|
size_t i, *pi;
|
|
if(line > UINT32_MAX)
|
|
{ errno = ERANGE; fail = "too many lines of text"; goto catch; }
|
|
if(!(i = pair_map_table_get(&scan->sources.map, keyword)))
|
|
{ fail = "keyword not introduced"; goto catch; }
|
|
switch(linemap_tree_try(&scan->sources.dates, key, &pi)) {
|
|
case TREE_PRESENT: fail = "duplicate key"; /* _Sic_. */
|
|
case TREE_ERROR: goto catch;
|
|
case TREE_ABSENT: *pi = i; break;
|
|
}
|
|
date32_to_string(date, &datestr);
|
|
fprintf(stderr, "%s: source <<%.*s>>\n",
|
|
datestr, (int)(s1 - s0), s0);
|
|
continue;
|
|
} }
|
|
/* New source. fixme: desc not set. */
|
|
<source> @s0 keyword @s1 ":" [^\x00\n]+ / "\n" => skip {
|
|
struct pair keyword = pair(s0, s1);
|
|
size_t *idx;
|
|
struct source *source;
|
|
switch(pair_map_table_assign(&scan->sources.map, keyword, &idx)) {
|
|
case TABLE_PRESENT: errno = EDOM; fail = "new keyword already used";
|
|
case TABLE_ERROR: goto catch; /* /\ _Sic_. */
|
|
case TABLE_ABSENT: *idx = 0; break; /* Good. */
|
|
}
|
|
if(!(source = source_array_new(&scan->sources.array))) goto catch;
|
|
*idx = (size_t)(source - scan->sources.array.data);
|
|
source->name.a = s0, source->name.b = s1;
|
|
source->desc.a = 0, source->desc.b = 0;
|
|
date32_to_string(date, &datestr);
|
|
fprintf(stderr, "%s: new source <<%.*s>> stored in list at %zu.\n",
|
|
datestr, (int)(s1 - s0), s0, *idx);
|
|
goto also_source;
|
|
}
|
|
|
|
|
|
<score> * { fail = "score unrecognized"; goto catch; }
|
|
/* Already there. Use the map to get the index from the keyword and
|
|
then stick a marker in the tree with that index. */
|
|
<score> @s0 keyword @s1 / "\n" => skip { also_score: {
|
|
const struct pair keyword = pair(s0, s1);
|
|
const union line64 key = { { (uint32_t)line, date } };
|
|
size_t idx, *pidx;
|
|
if(line > UINT32_MAX)
|
|
{ errno = ERANGE; fail = "too many lines of text"; goto catch; }
|
|
if(!(idx = pair_map_table_get(&scan->scores.map, keyword)))
|
|
{ fail = "keyword not introduced"; goto catch; }
|
|
if(scan->scores.array.data[idx].last.u32 >= date.u32)
|
|
{ fail = "duplicate key in same day"; goto catch; }
|
|
scan->scores.array.data[idx].last.u32 = date.u32;
|
|
switch(linemap_tree_bulk_try(&scan->scores.dates, key, &pidx)) {
|
|
case TREE_PRESENT: assert(0); fail = "duplicate key"; /* _Sic_. */
|
|
case TREE_ERROR: goto catch;
|
|
case TREE_ABSENT: *pidx = idx; break;
|
|
}
|
|
date32_to_string(date, &datestr);
|
|
fprintf(stderr, "%s: score <%.*s>\n", datestr, (int)(s1 - s0), s0);
|
|
continue;
|
|
} }
|
|
/* New score. */
|
|
<score> @s0 keyword @s1 ":" => score_name {
|
|
size_t *idx;
|
|
switch(pair_map_table_assign(&scan->scores.map,
|
|
pair(s0, s1), &idx)) {
|
|
case TABLE_PRESENT: errno = EDOM; fail = "new keyword already used";
|
|
case TABLE_ERROR: goto catch; /* _Sic_. */
|
|
case TABLE_ABSENT: *idx = 0; break;
|
|
}
|
|
if(!(score = score_array_new(&scan->scores.array))) goto catch;
|
|
*idx = (size_t)(score - scan->scores.array.data); /* Offset. */
|
|
/*struct pair key, name; union date32 date, last; unsigned edges;*/
|
|
score->key.a = s0, score->key.b = s1;
|
|
score->name.a = 0, score->name.b = 0;
|
|
score->date.u32 = score->last.u32 = 0;
|
|
score->edges = 0, score->score = 0;
|
|
date32_to_string(date, &datestr);
|
|
fprintf(stderr, "%s: new score <%.*s> stored in list at %zu.\n",
|
|
datestr, (int)(s1 - s0), s0, *idx);
|
|
goto also_score;
|
|
}
|
|
<score_name> * { fail = "name unrecognized"; goto catch; }
|
|
<score_date> * { fail = "date unrecognized"; goto catch; }
|
|
<score_edges> * { fail = "edges unrecognized"; goto catch; }
|
|
<score_name> ws* @s0 semitext+ (" " semitext+)* @s1 /* ws* */ ";"
|
|
=> score_date {
|
|
assert(score);
|
|
score->name.a = s0, score->name.b = s1;
|
|
}
|
|
<score_date> ws* "~"? @s0 date ws* ";" => score_edges {
|
|
assert(score);
|
|
if(!pair_to_date(s0, &score->date)) goto catch;
|
|
}
|
|
<score_edges> ws* "~"? @s0 uint @s1 ws* / "\n" => skip {
|
|
assert(score);
|
|
if(!pair_to_natural(s0, s1, &score->edges)) goto catch;
|
|
score = 0; /* Done. */
|
|
}
|
|
|
|
|
|
*/ }
|
|
assert(0); /* Never gets here. */
|
|
catch:
|
|
if(!errno) errno = EILSEQ;
|
|
date32_to_string(date, &datestr);
|
|
fprintf(stderr, "%s line %zu: %s.\n", datestr, line, fail);
|
|
return 0;
|
|
}
|
|
|
|
void scan_(struct scan *const scan) {
|
|
if(!scan) return;
|
|
linemap_tree_(&scan->scores.dates);
|
|
pair_map_table_(&scan->scores.map);
|
|
score_array_(&scan->scores.array);
|
|
}
|
|
|
|
/** @param[jrnl] Must be constant throughout the use of the returned value. */
|
|
struct scan scan(struct journal *const jrnl) {
|
|
struct scan scan = {0};
|
|
struct journal_iterator it;
|
|
union date32 date;
|
|
const char *text;
|
|
assert(jrnl);
|
|
|
|
/* Null is the first item for convenience, (TABLE_DEFAULT). */
|
|
{
|
|
struct source *nul;
|
|
if(!(nul = source_array_new(&scan.sources.array))) goto catch;
|
|
*nul = (struct source){0};
|
|
}
|
|
{
|
|
struct score *nul;
|
|
if(!(nul = score_array_new(&scan.scores.array))) goto catch;
|
|
*nul = (struct score){0};
|
|
}
|
|
|
|
/* Scan all. */
|
|
it = journal_iterator(jrnl);
|
|
while(journal_next(&it, &date, &text))
|
|
if(!scan_day(&scan, date, text)) goto catch;
|
|
fprintf(stderr, "List of scores: %s.\n"
|
|
"Mapped to indices: %s.\n"
|
|
"Date-line tree: %s.\n",
|
|
score_array_to_string(&scan.scores.array),
|
|
pair_map_table_to_string(&scan.scores.map),
|
|
linemap_tree_to_string(&scan.scores.dates));
|
|
goto finally;
|
|
catch:
|
|
scan_(&scan);
|
|
finally:
|
|
return scan;
|
|
}
|
|
|
|
int scan_scores_graph(struct scan *const scan) {
|
|
struct scores *const scrs = &scan->scores;
|
|
struct linemap_tree_iterator it = linemap_tree_iterator(&scrs->dates);
|
|
union line64 line;
|
|
struct score *score;
|
|
|
|
/* Set score to zero to verify count with paper journal. */
|
|
for(size_t i = 0; i < scrs->array.size; i++) scrs->array.data[i].score = 0;
|
|
|
|
/* 840 with legend; only useful to me. */
|
|
printf("set terminal pngcairo dashed transparent truecolor"
|
|
" size 840, 480 fontscale 1\n"
|
|
"set output \"score.png\"\n");
|
|
printf("$Data <<EOD\n"
|
|
"# date, key, key score\n");
|
|
while(linemap_tree_next(&it)) {
|
|
line = linemap_tree_key(&it);
|
|
score = scrs->array.data + *linemap_tree_value(&it);
|
|
char datestr[12];
|
|
date32_to_string(line.date, &datestr);
|
|
score->score++;
|
|
printf("%s, %.*s, %u\n", datestr,
|
|
(int)(score->key.b - score->key.a), score->key.a, score->score);
|
|
}
|
|
printf("EOD\n"
|
|
"# theozh https://stackoverflow.com/a/75466214/2472827\n"
|
|
"# get a unique list from datablock\n"
|
|
"addToList(list,col) = list.( strstrt(list,'\"'.strcol(col).'\"') \\\n"
|
|
" > 0 ? '' : ' \"'.strcol(col).'\"')\n"
|
|
"Uniqs = ''\n"
|
|
"stats $Data u (Uniqs=addToList(Uniqs,2)) nooutput\n"
|
|
"Uniq(i) = word(Uniqs,i)\n"
|
|
"getIndex(s) = sum [_i=1:words(Uniqs)] s eq word(Uniqs,_i) ? _i : 0\n"
|
|
"\n"
|
|
/*"stats $Data u 3 nooutput\n"
|
|
"cumsum = STATS_sum\n"
|
|
"stats $Data u 4 nooutput\n"
|
|
"setsum = STATS_sum\n"
|
|
"\n"*/
|
|
"myTimeFmt = \"%%Y-%%m-%%d\"\n"
|
|
"set format x myTimeFmt timedate\n"
|
|
"set xtics format myTimeFmt rotate by -30\n"
|
|
"set ylabel \"happiness and satisfaction CDF (days)\"\n"
|
|
"set grid\n"
|
|
"set key out reverse Left noautotitle\n"
|
|
"set style fill solid 0.5\n"
|
|
"unset border\n"
|
|
"unset key\n"
|
|
"set autoscale xfix # max? hack: can't get x to extend further\n"
|
|
"\n"
|
|
/*"set label sprintf(\"%%u cumulative words (duplicate verses counted)\","
|
|
" cumsum) center at graph 0.5, first cumsum*100/%zu offset 0,0.5\n"
|
|
"set label sprintf(\"%%u unique KJV verse words memorized\", setsum) "
|
|
"center at graph 0.5, first setsum*100/%zu offset 0,0.5\n"
|
|
"\n"*/
|
|
"plot \\\n"
|
|
" total=0 $Data u"
|
|
" (timecolumn(1,myTimeFmt)):(total=total+1) \\\n"
|
|
" w steps lc \"black\" dt 1 lw 1, \\\n"
|
|
" total=0 '' u \\\n"
|
|
" (timecolumn(1,myTimeFmt)): \\\n"
|
|
" (total=total+1,total/2.): \\\n"
|
|
" (43200): \\\n"
|
|
" (total/2.): \\\n"
|
|
" (getIndex(strcol(2))) w boxxy lc var lw 1, \\\n"
|
|
" for [i=1:words(Uniqs)] keyentry w boxxy lc i ti Uniq(i)\n");
|
|
return 1;
|
|
}
|