diff --git a/Makefile b/Makefile index a16af21..850f6e2 100644 --- a/Makefile +++ b/Makefile @@ -43,7 +43,7 @@ bin/test-kjv: build/text.o build/pair.o build/kjvcite.o build/test_kjv.o bin/kjv: build/text.o build/pair.o build/journal.o build/kjvcite.o build/kjv.o build/source.o bin/flight: build/text.o build/pair.o build/journal.o build/source.o build/flights.o build/flighthours.o bin/score: build/text.o build/pair.o build/journal.o build/scores.o -bin/scan: build/text.o build/journal.o build/pair.o build/driver.o +bin/scan: build/text.o build/journal.o build/pair.o build/scan.o build/driver.o bin/%: @echo "\033[1;36mlinking $@\033[0m" @@ -73,7 +73,7 @@ build/%.c: src/%.re.c # # https://github.com/neil-edelman/cdoc documentation # -cdoc -o $@ $< -.SECONDARY: build/kjv.c build/journal.c build/source.c build/scan_kjv.c build/flights.c build/kjvcite.c build/scores.c +.SECONDARY: build/kjv.c build/journal.c build/source.c build/scan_kjv.c build/flights.c build/kjvcite.c build/scores.c build/scan.c .PHONY: clean release test test: $(projects) diff --git a/src/driver.c b/src/driver.c index 245181d..11aec9a 100644 --- a/src/driver.c +++ b/src/driver.c @@ -6,6 +6,7 @@ @std C99 */ #include "journal.h" +#include "scores.h" #include #include #include @@ -13,6 +14,7 @@ int main(void) { const char *intent = "start"; struct journal jrnl = {0}; + struct scores scrs = {0}; errno = 0; intent = "../journal"; @@ -20,10 +22,18 @@ int main(void) { fprintf(stderr, "Journal: %s.\n", journal_to_string(&jrnl)); if(errno) goto catch; + intent = "parse"; + scrs = scores(&jrnl); + fprintf(stderr, "Scores: %s.\n", scores_to_string(&scrs)); + if(scores_is_empty(&scrs)) goto catch; + if(!scores_graph(&scrs)) goto catch; + + intent = 0; goto finally; catch: perror(intent); finally: + scores_(&scrs); journal_(&jrnl); return intent ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/src/journal.re.c b/src/journal.re.c index 065e53b..3a0aa04 100644 --- a/src/journal.re.c +++ b/src/journal.re.c @@ -214,7 +214,8 @@ struct journal journal(const char *const dir_journal) { convert all of offsets back to pointers. */ it = day_tree_iterator(&j.days); while(day_tree_next(&it)) - *(v.text = day_tree_value(&it)) = j.backing.data + *v.offset; + v.text = day_tree_value(&it), *v.text = j.backing.data + *v.offset; + goto finally; catch: fprintf(stderr, "On date: %s/%d-%.2d-%.2d.\n", diff --git a/src/pair.h b/src/pair.h index 38b3ee1..21eef23 100644 --- a/src/pair.h +++ b/src/pair.h @@ -4,6 +4,7 @@ #include struct pair { const char *a, *b; }; + struct pair pair(const char *const a, const char *const b); int pair_to_natural(const char *, const char *, uint32_t *); int pair_colon_to_minutes(const char *, const char *, @@ -14,8 +15,6 @@ int pair_is_equal(struct pair, struct pair); int pair_is_string(struct pair, const char *); #include "journal.h" /* date32 */ int pair_to_date(const char *a, union date32 *const d); -/*fixme -uint32_t pair_djb2(const struct pair p);*/ /* Supporting pair -> size_t for looking up in arrays. */ #define TABLE_NAME pairmap diff --git a/src/scan.h b/src/scan.h new file mode 100644 index 0000000..d0ba9b4 --- /dev/null +++ b/src/scan.h @@ -0,0 +1,33 @@ +#include "pair.h" /* pair */ +#include "journal.h" /* size_t, date32, line64 */ + +/* Map from line to index in array. */ +#define TREE_NAME linetoindex +#define TREE_KEY union line64 +#define TREE_VALUE size_t +#define TREE_HEAD +#include "../src/tree.h" + +/* Score array. */ +struct score { + struct pair key, name; + union date32 date, last/* update */; + unsigned edges, score/* update */; +}; +#define ARRAY_NAME score +#define ARRAY_TYPE struct score +#define ARRAY_HEAD +#include "../src/array.h" + + +struct scores { + struct score_array array; + struct pairmap_table map; + struct linetoindex_tree dates; +}; + +void scores_(struct scores *); +struct scores scores(struct journal *); +int scores_is_empty(const struct scores *); +int scores_graph(struct scores *); +const char *scores_to_string(const struct scores *); diff --git a/src/scan.re.c b/src/scan.re.c new file mode 100644 index 0000000..fd5a484 --- /dev/null +++ b/src/scan.re.c @@ -0,0 +1,263 @@ +/** @license 2022 Neil Edelman, distributed under the terms of the + [MIT License](https://opensource.org/licenses/MIT). + + Scan journal. */ + +#include "../src/journal.h" +#include "../src/scan.h" +#include +#include + + +/* Tree mapping from date-line in the journal to indices in whatever array. */ +static void linetoindex_to_string(const union line64 line, const size_t *const u, + char (*const a)[12]) { (void)u; date32_to_string(line.date, a); } +static int linetoindex_compare(const union line64 a, const union line64 b) + { return a.u64 > b.u64; } +#define TREE_NAME linetoindex +#define TREE_KEY union line64 +#define TREE_VALUE size_t +#define TREE_COMPARE +#define TREE_TO_STRING +#define TREE_DEFAULT 0 +#define TREE_BODY +#include "../src/tree.h" + + +/* Score array, one of the endpoints to index tree. */ +static void score_to_string(const struct score *const s, + char (*const z)[12]) { + const char *a = s->name.a, *b; + char *y = *z; + b = s->name.b <= a + 11 ? s->name.b : a + 11; + while(a < b) *(y++) = *(a++); + *y = '\0'; +} +#define ARRAY_NAME score +#define ARRAY_TYPE struct score +#define ARRAY_TO_STRING +#define ARRAY_BODY +#include "../src/array.h" + + + + +/*!conditions:re2c*/ + +static int scan(union date32 date, const char *const buffer, + struct scores *const scores) { + const char *YYCURSOR = buffer, *YYMARKER, *yyt1, *yyt2, *s0, *s1; + enum YYCONDTYPE condition = yycline; + size_t line = 1; + char datestr[12] = {0}; + const char *why = "unexpected"; + struct score *score = 0; + assert(buffer && scores); + YYCURSOR = YYMARKER = yyt1 = buffer; + /*!re2c /**/ + re2c:define:YYCTYPE = char; + re2c:yyfill:enable = 0; + re2c:define:YYGETCONDITION = "condition"; + re2c:define:YYSETCONDITION = "condition = @@;"; + re2c:define:YYGETCONDITION:naked = 1; + re2c:define:YYSETCONDITION:naked = 1; + + ws = [ \t]; + glyph = [^\x00-\x20\x7f]; // [^\x00\n\t ] + all weird + semitext = glyph \ ";"; + natural = [1-9][0-9]*; + uint = [0-9]+; + keyword = [A-Za-z0-9][A-Za-z0-9_-]*; + date = natural "-" [0-1][0-9] "-" [0-3][0-9]; + */ + for( ; ; ) { /*!re2c /**/ + /* Default ignore. */ + [^\n\x00] { continue; } + "\x00" { why = "no newline at end of file"; goto catch; } + "\x00" { return 1; } + "\n" => line { line++; continue; } + * :=> skip + "::" / [^:] :=> score + + * { why = "score unrecognized"; goto catch; } + /* Already there. Use the map to get the index from the keyword and + then stick a marker in the tree with that index. */ + @s0 keyword @s1 / "\n" => skip { also_add_to_tree: { + const struct pair keyword = pair(s0, s1); + const union line64 key = { { (uint32_t)line, date } }; + size_t idx, *pidx; + if(line > UINT32_MAX) + { errno = ERANGE; why = "too many lines of text"; goto catch; } + if(!(idx = pair_map_table_get(&scores->map, keyword))) + { why = "keyword not introduced"; goto catch; } + if(scores->array.data[idx].last.u32 >= date.u32) + { why = "duplicate key in same day"; goto catch; } + scores->array.data[idx].last.u32 = date.u32; + switch(linetoindex_tree_bulk_try(&scores->dates, key, &pidx)) { + case TREE_PRESENT: assert(0); why = "duplicate key"; /* _Sic_. */ + case TREE_ERROR: goto catch; + case TREE_ABSENT: *pidx = idx; break; + } + date32_to_string(date, &datestr); + fprintf(stderr, "%s: <%.*s>\n", datestr, (int)(s1 - s0), s0); + continue; + } } + /* New score. */ + @s0 keyword @s1 ":" => score_name { + size_t *idx; + switch(pair_map_table_assign(&scores->map, pair(s0, s1), &idx)) { + case TABLE_PRESENT: errno = EDOM; why = "new keyword already used"; + case TABLE_ERROR: goto catch; /* _Sic_. */ + case TABLE_ABSENT: *idx = 0; break; + } + if(!(score = score_array_new(&scores->array))) goto catch; + *idx = (size_t)(score - scores->array.data); + /*struct pair key, name; union date32 date, last; unsigned edges;*/ + score->key.a = s0, score->key.b = s1; + score->name.a = 0, score->name.b = 0; + score->date.u32 = score->last.u32 = 0; + score->edges = 0, score->score = 0; + date32_to_string(date, &datestr); + fprintf(stderr, "%s: new score <%.*s> stored in list at %zu.\n", + datestr, (int)(s1 - s0), s0, *idx); + goto also_add_to_tree; + } + * { why = "name unrecognized"; goto catch; } + * { why = "date unrecognized"; goto catch; } + * { why = "edges unrecognized"; goto catch; } + ws* @s0 semitext+ (" " semitext+)* @s1 /* ws* */ ";" + => score_date { + assert(score); + score->name.a = s0, score->name.b = s1; + } + ws* "~"? @s0 date ws* ";" => score_edges { + assert(score); + if(!pair_to_date(s0, &score->date)) goto catch; + } + ws* "~"? @s0 uint @s1 ws* / "\n" => skip { + assert(score); + if(!pair_to_natural(s0, s1, &score->edges)) goto catch; + score = 0; /* Done. */ + } + */ } + assert(0); /* Never gets here. */ +catch: + if(!errno) errno = EILSEQ; + date32_to_string(date, &datestr); + fprintf(stderr, "%s line %zu: %s.\n", datestr, line, why); + return 0; +} + +void scores_(struct scores *const s) { + if(!s) return; + linetoindex_tree_(&s->dates); + pair_map_table_(&s->map); + score_array_(&s->array); +} + +struct scores scores(struct journal *const j) { + struct scores s + = { score_array(), pair_map_table(), linetoindex_tree() }; + struct journal_iterator it; + union date32 date; + const char *text; + assert(j); + { /* Null is the first item for convenience, (TABLE_DEFAULT). */ + struct score *nul; + if(!(nul = score_array_new(&s.array))) goto catch; + nul->key.a = nul->key.b = nul->name.a = nul->name.b = 0; + nul->date.u32 = 0; + nul->edges = 0; + } + it = journal_iterator(j); + while(journal_next(&it, &date, &text)) { + char a[12]; + date32_to_string(date, &a); + printf(": %s\n", a); + printf("<<%s>>\n", text); + if(!scan(date, text, &s)) goto catch; + } + fprintf(stderr, "List of scores: %s.\n" + "Mapped to indices: %s.\n" + "Date-line tree: %s.\n", score_array_to_string(&s.array), + pair_map_table_to_string(&s.map), linetoindex_tree_to_string(&s.dates)); + goto finally; +catch: + scores_(&s); +finally: + return s; +} + +int scores_is_empty(const struct scores *const s) + { return !s || !s->dates.root.node; } + +const char *scores_to_string(const struct scores *const s) + { return assert(s), score_array_to_string(&s->array); } + +int scores_graph(struct scores *const scrs) { + struct linetoindex_tree_iterator it = linetoindex_tree_iterator(&scrs->dates); + union line64 line; + struct score *score; + + /* Set score to zero to verify count with paper journal. */ + for(size_t i = 0; i < scrs->array.size; i++) scrs->array.data[i].score = 0; + + /* 840 with legend; only useful to me. */ + printf("set terminal pngcairo dashed transparent truecolor" + " size 840, 480 fontscale 1\n" + "set output \"score.png\"\n"); + printf("$Data <array.data + *linetoindex_tree_value(&it); + char datestr[12]; + date32_to_string(line.date, &datestr); + score->score++; + printf("%s, %.*s, %u\n", datestr, + (int)(score->key.b - score->key.a), score->key.a, score->score); + } + printf("EOD\n" + "# theozh https://stackoverflow.com/a/75466214/2472827\n" + "# get a unique list from datablock\n" + "addToList(list,col) = list.( strstrt(list,'\"'.strcol(col).'\"') \\\n" + " > 0 ? '' : ' \"'.strcol(col).'\"')\n" + "Uniqs = ''\n" + "stats $Data u (Uniqs=addToList(Uniqs,2)) nooutput\n" + "Uniq(i) = word(Uniqs,i)\n" + "getIndex(s) = sum [_i=1:words(Uniqs)] s eq word(Uniqs,_i) ? _i : 0\n" + "\n" + /*"stats $Data u 3 nooutput\n" + "cumsum = STATS_sum\n" + "stats $Data u 4 nooutput\n" + "setsum = STATS_sum\n" + "\n"*/ + "myTimeFmt = \"%%Y-%%m-%%d\"\n" + "set format x myTimeFmt timedate\n" + "set xtics format myTimeFmt rotate by -30\n" + "set ylabel \"happiness CDF (days)\"\n" + "set grid\n" + "set key out reverse Left noautotitle\n" + "set style fill solid 0.5\n" + "unset border\n" + "unset key\n" + "set autoscale xfix # max? hack: can't get x to extend further\n" + "\n" + /*"set label sprintf(\"%%u cumulative words (duplicate verses counted)\"," + " cumsum) center at graph 0.5, first cumsum*100/%zu offset 0,0.5\n" + "set label sprintf(\"%%u unique KJV verse words memorized\", setsum) " + "center at graph 0.5, first setsum*100/%zu offset 0,0.5\n" + "\n"*/ + "plot \\\n" + " total=0 $Data u" + " (timecolumn(1,myTimeFmt)):(total=total+1) \\\n" + " w steps lc \"black\" dt 1 lw 1, \\\n" + " total=0 '' u \\\n" + " (timecolumn(1,myTimeFmt)): \\\n" + " (total=total+1,total/2.): \\\n" + " (43200): \\\n" + " (total/2.): \\\n" + " (getIndex(strcol(2))) w boxxy lc var lw 1, \\\n" + " for [i=1:words(Uniqs)] keyentry w boxxy lc i ti Uniq(i)\n"); + return 1; +}