Works with text.

This commit is contained in:
Neil 2023-05-06 18:11:23 -07:00
parent 9c2edcb9eb
commit c096fbf605
2 changed files with 100 additions and 42 deletions

View File

@ -85,7 +85,7 @@ struct scan {
struct source_array array; struct source_array array;
struct pairmap_table map; struct pairmap_table map;
struct linemap_tree dates; struct linemap_tree dates;
} sources; } sources, documents;
struct { struct {
struct place_array array; struct place_array array;
struct pairmap_table map; struct pairmap_table map;
@ -96,7 +96,6 @@ struct scan {
struct pairmap_table map; struct pairmap_table map;
struct linemap_tree dates; struct linemap_tree dates;
} scores; } scores;
struct source_array documents;
struct glider_tree gliders; struct glider_tree gliders;
struct flight_tree flights; struct flight_tree flights;
struct kjv_tree kjvs; struct kjv_tree kjvs;

View File

@ -109,6 +109,7 @@ static int scan_day(struct scan *const scan, union date32 date,
size_t line = 1; size_t line = 1;
char datestr[12] = {0}; char datestr[12] = {0};
const char *fail = "perhaps a bat?"; const char *fail = "perhaps a bat?";
struct pair *future = 0;
struct score *new_score = 0; struct score *new_score = 0;
struct glider *new_glider = 0; struct glider *new_glider = 0;
struct flight *new_flight = 0; struct flight *new_flight = 0;
@ -129,12 +130,18 @@ static int scan_day(struct scan *const scan, union date32 date,
ws = [ \t]; ws = [ \t];
glyph = [^\x00-\x20\x7f] | [\x80-\xff]; // [^\x00\n\t ] + all weird glyph = [^\x00-\x20\x7f] | [\x80-\xff]; // [^\x00\n\t ] + all weird
semitext = glyph \ ";"; semichar = glyph \ ";";
labelchar = glyph \ [;]; // perhaps? brachar = semichar \ [[\]];
label_mbra = labelchar \ [[\]]; parchar = glyph \ [()];
label_mpar = labelchar \ [()];
label_in = label_mbra+ (" " label_mbra+)*; // "[label in; ...]" // label: possibly separated by spaces; used in freestanding last
label_out = (label_mpar labelchar*) (" " labelchar+)*; // "() label out" anylabel = glyph+ (ws+ glyph+)*;
// label except ";"; used in freestanding
semilabel = semichar+ (ws+ semichar+)*;
// label except ";[]"; used in []
bralabel = brachar+ (ws+ brachar+)*;
// label except start "()"; used in location
parlabel = (parchar glyph*) (ws+ glyph+)*;
keyword = [A-Za-z0-9][A-Za-z0-9_-]*; keyword = [A-Za-z0-9][A-Za-z0-9_-]*;
@ -165,24 +172,33 @@ static int scan_day(struct scan *const scan, union date32 date,
<line> "[flight]" :=> flight_type <line> "[flight]" :=> flight_type
<line> "[" :=> bracket <line> "[" :=> bracket
/* ^"[" ... */
<bracket> * { fail = "bracket unrecognized"; goto catch; } <bracket> * { fail = "bracket unrecognized"; goto catch; }
<bracket> "document: " :=> document_title <bracket> "document: " :=> document_title
// "<<\ntext\n>>" or "text\n" used by several.
<text> * { fail = "text"; goto catch; }
<text> ws+ :=> text
<text> glyph+ {
}
<document_title> * { fail = "document title"; goto catch; } <document_title> * { fail = "document title"; goto catch; }
<document_title> @s0 label_in @s1 "]" => text { <document_title> @s0 bralabel @s1 "]" => text_input {
const struct pair label = pair(s0, s1); const union line64 key = { { (uint32_t)line, date } };
fprintf(stderr, "document: <<%.*s>>?\n", (int)(s1 - s0), s0); size_t *pi;
struct source *doc;
fprintf(stderr, "document: <<%.*s>>\n", (int)(s1 - s0), s0);
if(!(doc = source_array_new(&scan->documents.array))) goto catch;
doc->name.a = s0, doc->name.b = s1;
doc->desc.a = 0, doc->desc.b = 0;
assert(!future), future = &doc->desc;
switch(linemap_tree_bulk_assign(&scan->documents.dates, key, &pi)) {
case TREE_PRESENT: fail = "duplicate"; case TREE_ERROR: goto catch;
case TREE_ABSENT:
*pi = (size_t)(doc - scan->documents.array.data); break;
}
fprintf(stderr, "%s[%zu]: new document <<%.*s>> stored at %zu.\n",
datestr, line, (int)(s1 - s0), s0, *pi);
assert(future);
continue; continue;
} }
<place> * { fail = "place unrecognized"; goto catch; } <place> * { fail = "place unrecognized"; goto catch; }
<place> @s0 label_out @s1 / "\n" => skip { also_place: { <place> @s0 parlabel @s1 / "\n" => skip { also_place: {
const struct pair keyword = pair(s0, s1); const struct pair keyword = pair(s0, s1);
const union line64 key = { { (uint32_t)line, date } }; const union line64 key = { { (uint32_t)line, date } };
size_t i, *pi; size_t i, *pi;
@ -200,7 +216,7 @@ static int scan_day(struct scan *const scan, union date32 date,
datestr, line, (int)(s1 - s0), s0); datestr, line, (int)(s1 - s0), s0);
} continue; } } continue; }
<place> "(" @t0 decimal "," @t1 decimal ")" <place> "(" @t0 decimal "," @t1 decimal ")"
ws+ @s0 label_out @s1 / "\n" => skip { ws+ @s0 parlabel @s1 / "\n" => skip {
const struct pair keyword = pair(s0, s1); const struct pair keyword = pair(s0, s1);
const double x = strtod(t0, 0), y = strtod(t1, 0); /* Safe? */ const double x = strtod(t0, 0), y = strtod(t1, 0); /* Safe? */
size_t *idx; size_t *idx;
@ -242,6 +258,7 @@ static int scan_day(struct scan *const scan, union date32 date,
const struct pair keyword = pair(s0, s1); const struct pair keyword = pair(s0, s1);
const union line64 key = { { (uint32_t)line, date } }; const union line64 key = { { (uint32_t)line, date } };
size_t i, *pi; size_t i, *pi;
//fixme: verify way before
if(line > UINT32_MAX) if(line > UINT32_MAX)
{ errno = ERANGE; fail = "too many lines of text"; goto catch; } { errno = ERANGE; fail = "too many lines of text"; goto catch; }
if(!(i = pair_map_table_get(&scan->sources.map, keyword))) if(!(i = pair_map_table_get(&scan->sources.map, keyword)))
@ -254,6 +271,7 @@ static int scan_day(struct scan *const scan, union date32 date,
datestr, line, (int)(s1 - s0), s0); datestr, line, (int)(s1 - s0), s0);
} continue; } } continue; }
/* New source. fixme: desc not set. */ /* New source. fixme: desc not set. */
////////
<source> @s0 keyword @s1 ":" [^\x00\n]+ / "\n" => skip { <source> @s0 keyword @s1 ":" [^\x00\n]+ / "\n" => skip {
struct pair keyword = pair(s0, s1); struct pair keyword = pair(s0, s1);
size_t *idx; size_t *idx;
@ -319,20 +337,19 @@ static int scan_day(struct scan *const scan, union date32 date,
goto new_score; goto new_score;
} }
<score_name> * { fail = "name unrecognized"; goto catch; } <score_name> * { fail = "name unrecognized"; goto catch; }
<score_name> ws* @s0 semitext+ (" " semitext+)* @s1 /* ws* */ ";" <score_name> ws* @s0 semilabel @s1 ";" => score_date {
=> score_date {
assert(new_score); assert(new_score);
new_score->name.a = s0, new_score->name.b = s1; new_score->name.a = s0, new_score->name.b = s1;
continue; continue;
} }
<score_date> * { fail = "date unrecognized"; goto catch; } <score_date> * { fail = "date unrecognized"; goto catch; }
<score_date> ws* "~"? @s0 date ws* ";" => score_edges { <score_date> ws* "~"? @s0 date ";" => score_edges {
assert(new_score); assert(new_score);
if(!pair_to_date(s0, &new_score->date)) goto catch; if(!pair_to_date(s0, &new_score->date)) goto catch;
continue; continue;
} }
<score_edges> * { fail = "edges unrecognized"; goto catch; } <score_edges> * { fail = "edges unrecognized"; goto catch; }
<score_edges> ws* "~"? @s0 uint @s1 ws* / "\n" => skip { <score_edges> ws* "~"? @s0 uint @s1 / "\n" => skip {
assert(new_score); assert(new_score);
if(!pair_to_natural(s0, s1, &new_score->edges)) goto catch; if(!pair_to_natural(s0, s1, &new_score->edges)) goto catch;
new_score = 0; /* Done. */ new_score = 0; /* Done. */
@ -343,7 +360,7 @@ static int scan_day(struct scan *const scan, union date32 date,
/* type, reg, launch, how, height, landing, pilot, dual, instr, remarks /* type, reg, launch, how, height, landing, pilot, dual, instr, remarks
eg, [glider] 2-33A; C-GCLK; CYQQ; A; 2000'; CYQQ; ;:13;; Peters D1 */ eg, [glider] 2-33A; C-GCLK; CYQQ; A; 2000'; CYQQ; ;:13;; Peters D1 */
<glider_type> * { fail = "glider type"; goto catch; } <glider_type> * { fail = "glider type"; goto catch; }
<glider_type> ws* @s0 semitext+ @s1 ws* ";" => glider_reg { <glider_type> ws* @s0 semilabel @s1 ";" => glider_reg {
const union line64 key = {{ (uint32_t)line, date }}; const union line64 key = {{ (uint32_t)line, date }};
assert(!new_glider); assert(!new_glider);
if(line > UINT32_MAX) { fail = "line overflow"; goto catch; } if(line > UINT32_MAX) { fail = "line overflow"; goto catch; }
@ -355,10 +372,10 @@ static int scan_day(struct scan *const scan, union date32 date,
continue; continue;
} }
<glider_reg> * { fail = "glider reg"; goto catch; } <glider_reg> * { fail = "glider reg"; goto catch; }
<glider_reg> ws* @s0 semitext+ @s1 ws* ";" => glider_launch <glider_reg> ws* @s0 semilabel @s1 ";" => glider_launch
{ new_glider->reg.a = s0, new_glider->reg.b = s1; continue; } { new_glider->reg.a = s0, new_glider->reg.b = s1; continue; }
<glider_launch> * { fail = "glider launch"; goto catch; } <glider_launch> * { fail = "glider launch"; goto catch; }
<glider_launch> ws* @s0 airport @s1 ws* ";" => glider_how { <glider_launch> ws* @s0 airport @s1 ";" => glider_how {
new_glider->launch.a = s0, new_glider->launch.b = s1; new_glider->launch.a = s0, new_glider->launch.b = s1;
fprintf(stderr, "%s[%zu]: glider <<%.*s>> at <<%.*s>>\n", fprintf(stderr, "%s[%zu]: glider <<%.*s>> at <<%.*s>>\n",
datestr, line, (int)(new_glider->reg.b - new_glider->reg.a), datestr, line, (int)(new_glider->reg.b - new_glider->reg.a),
@ -366,7 +383,7 @@ static int scan_day(struct scan *const scan, union date32 date,
continue; continue;
} }
<glider_how> * { fail = "glider how"; goto catch; } <glider_how> * { fail = "glider how"; goto catch; }
<glider_how> ws* @s0 [MWA] ws* ";" => glider_height { <glider_how> ws* @s0 [MWA] ";" => glider_height {
switch(*s0) { switch(*s0) {
case 'M': new_glider->how = MotorCarTow; break; case 'M': new_glider->how = MotorCarTow; break;
case 'W': new_glider->how = Winch; break; case 'W': new_glider->how = Winch; break;
@ -375,10 +392,10 @@ static int scan_day(struct scan *const scan, union date32 date,
continue; continue;
} }
<glider_height> * { fail = "glider height"; goto catch; } <glider_height> * { fail = "glider height"; goto catch; }
<glider_height> ws* @s0 natural @s1 "'" ws* ";" => glider_landing <glider_height> ws* @s0 natural @s1 "';" => glider_landing
{ if(!pair_to_natural(s0, s1, &new_glider->height_ft)); continue; } { if(!pair_to_natural(s0, s1, &new_glider->height_ft)); continue; }
<glider_landing> * { fail = "glider landing"; goto catch; } <glider_landing> * { fail = "glider landing"; goto catch; }
<glider_landing> ws* @s0 airport @s1 ws* ";" => glider_pilot <glider_landing> ws* @s0 airport @s1 ";" => glider_pilot
{ new_glider->landing.a = s0, new_glider->landing.b = s1; continue;} { new_glider->landing.a = s0, new_glider->landing.b = s1; continue;}
<glider_pilot> * { fail = "glider pilot time"; goto catch; } <glider_pilot> * { fail = "glider pilot time"; goto catch; }
<glider_pilot> ws* ";" => glider_dual /* not PIC */ <glider_pilot> ws* ";" => glider_dual /* not PIC */
@ -402,10 +419,10 @@ static int scan_day(struct scan *const scan, union date32 date,
&new_glider->instr_min)) { fail = "glider instr time"; goto catch; } &new_glider->instr_min)) { fail = "glider instr time"; goto catch; }
continue; } continue; }
<glider_remarks> * { fail = "glider remarks"; goto catch; } <glider_remarks> * { fail = "glider remarks"; goto catch; }
<glider_remarks> ws* "\n" => line <glider_remarks> "\n" => line
{ new_glider->remarks.a = new_glider->remarks.b = 0; { new_glider->remarks.a = new_glider->remarks.b = 0;
new_glider = 0; line++; continue; } new_glider = 0; line++; continue; }
<glider_remarks> ws* @s0 glyph+ (" " glyph+)* @s1 "\n" => line <glider_remarks> ws* @s0 anylabel @s1 "\n" => line
{ new_glider->remarks.a = s0, new_glider->remarks.b = s1; { new_glider->remarks.a = s0, new_glider->remarks.b = s1;
new_glider = 0; line++; continue; } new_glider = 0; line++; continue; }
@ -413,7 +430,7 @@ static int scan_day(struct scan *const scan, union date32 date,
/* type; registration; launch -- landing; pic; sic; /* type; registration; launch -- landing; pic; sic;
single engine day dual; pilot; instrument simulated; actual; remarks */ single engine day dual; pilot; instrument simulated; actual; remarks */
<flight_type> * { fail = "flight type"; goto catch; } <flight_type> * { fail = "flight type"; goto catch; }
<flight_type> ws* @s0 semitext+ @s1 ws* ";" => flight_reg { <flight_type> ws* @s0 semilabel @s1 ";" => flight_reg {
const union line64 key const union line64 key
= {{ (uint32_t)line, {{ date.day, date.month, date.year }} }}; = {{ (uint32_t)line, {{ date.day, date.month, date.year }} }};
assert(!new_flight); assert(!new_flight);
@ -427,7 +444,7 @@ static int scan_day(struct scan *const scan, union date32 date,
continue; continue;
} }
<flight_reg> * { fail = "flight reg"; goto catch; } <flight_reg> * { fail = "flight reg"; goto catch; }
<flight_reg> ws* @s0 semitext+ @s1 ws* ";" => flight_airports <flight_reg> ws* @s0 semilabel @s1 ";" => flight_airports
{ new_flight->reg.a = s0, new_flight->reg.b = s1; continue; } { new_flight->reg.a = s0, new_flight->reg.b = s1; continue; }
<flight_airports> * { fail = "flight airports"; goto catch; } <flight_airports> * { fail = "flight airports"; goto catch; }
<flight_airports> ws* @s0 airport @s1 ws* "--" <flight_airports> ws* @s0 airport @s1 ws* "--"
@ -440,14 +457,12 @@ static int scan_day(struct scan *const scan, union date32 date,
continue; continue;
} }
<flight_pic> * { fail = "flight pic"; goto catch; } <flight_pic> * { fail = "flight pic"; goto catch; }
<flight_pic> ws* @s0 semitext+ (ws+ semitext+)* @s1 /* ws*? */";" <flight_pic> ws* @s0 semilabel @s1 ";" => flight_sic
=> flight_sic
{ new_flight->pilot.a = s0, new_flight->pilot.b = s1; continue; } { new_flight->pilot.a = s0, new_flight->pilot.b = s1; continue; }
<flight_sic> * { fail = "flight sic"; goto catch; } <flight_sic> * { fail = "flight sic"; goto catch; }
<flight_sic> ws* ";" => flight_dual <flight_sic> ws* ";" => flight_dual
{ new_flight->copilot.a = new_flight->copilot.b = 0; continue; } { new_flight->copilot.a = new_flight->copilot.b = 0; continue; }
<flight_sic> ws* @s0 semitext+ (ws+ semitext+)* @s1 ";" <flight_sic> ws* @s0 semilabel @s1 ";" => flight_dual
=> flight_dual
{ new_flight->copilot.a = s0, new_flight->copilot.b = s1; continue; } { new_flight->copilot.a = s0, new_flight->copilot.b = s1; continue; }
<flight_dual> * { fail = "flight dual time"; goto catch; } <flight_dual> * { fail = "flight dual time"; goto catch; }
<flight_dual> ws* ";" => flight_pilot <flight_dual> ws* ";" => flight_pilot
@ -478,10 +493,10 @@ static int scan_day(struct scan *const scan, union date32 date,
&new_flight->ifr_min)) { fail = "flight ifr time"; goto catch; } &new_flight->ifr_min)) { fail = "flight ifr time"; goto catch; }
continue; } continue; }
<flight_remarks> * { fail = "flight remarks"; goto catch; } <flight_remarks> * { fail = "flight remarks"; goto catch; }
<flight_remarks> ws* "\n" => line <flight_remarks> "\n" => line
{ new_flight->remarks.a = new_flight->remarks.b = 0; { new_flight->remarks.a = new_flight->remarks.b = 0;
new_flight = 0; line++; continue; } new_flight = 0; line++; continue; }
<flight_remarks> ws* @s0 glyph+ (ws+ glyph+)* @s1 "\n" => line <flight_remarks> ws* @s0 anylabel @s1 "\n" => line
{ new_flight->remarks.a = s0, new_flight->remarks.b = s1; { new_flight->remarks.a = s0, new_flight->remarks.b = s1;
new_flight = 0; line++; continue; } new_flight = 0; line++; continue; }
@ -497,7 +512,8 @@ static int scan_day(struct scan *const scan, union date32 date,
<line> "Judges" / kjvlookat => book { book = Judges; continue; } <line> "Judges" / kjvlookat => book { book = Judges; continue; }
<line> "Ruth" / kjvlookat => book { book = Ruth; continue; } <line> "Ruth" / kjvlookat => book { book = Ruth; continue; }
<line> first "Samuel" / kjvlookat => book { book = ISamuel; continue; } <line> first "Samuel" / kjvlookat => book { book = ISamuel; continue; }
<line> second "Samuel" / kjvlookat => book { book = IISamuel; continue; } <line> second "Samuel" / kjvlookat => book
{ book = IISamuel; continue; }
<line> first "Kings" / kjvlookat => book { book = IKings; continue; } <line> first "Kings" / kjvlookat => book { book = IKings; continue; }
<line> second "Kings" / kjvlookat => book { book = IIKings; continue; } <line> second "Kings" / kjvlookat => book { book = IIKings; continue; }
<line> first "Chronicles" / kjvlookat <line> first "Chronicles" / kjvlookat
@ -568,7 +584,7 @@ static int scan_day(struct scan *const scan, union date32 date,
<line> "Revelation" / kjvlookat => book { book = Revelation; continue; } <line> "Revelation" / kjvlookat => book { book = Revelation; continue; }
<book> * { fail = "kjv unrecognized"; goto catch; } <book> * { fail = "kjv unrecognized"; goto catch; }
/* 19:15a, just ignore the a. */ /* 19:15a, just ignore the a. */
<book> ws+ @s0 natural @s1 ":" @t0 natural @t1 [ab]? { <book> " " @s0 natural @s1 ":" @t0 natural @t1 [ab]? {
if(chapter || verse || verse_end) if(chapter || verse || verse_end)
{ fail = "kjv reference"; goto catch; } { fail = "kjv reference"; goto catch; }
if(!pair_to_natural(s0, s1, &chapter) if(!pair_to_natural(s0, s1, &chapter)
@ -583,7 +599,7 @@ static int scan_day(struct scan *const scan, union date32 date,
{ fail = "kjv range numerical error"; goto catch; } { fail = "kjv range numerical error"; goto catch; }
continue; continue;
} }
<book> ws+ "--" ws+ => skip { <book> " -- " => skip {
if(!chapter || !verse) if(!chapter || !verse)
{ fail = "kjv missing information"; goto catch; } { fail = "kjv missing information"; goto catch; }
if(verse_end && verse_end <= verse) if(verse_end && verse_end <= verse)
@ -608,6 +624,43 @@ static int scan_day(struct scan *const scan, union date32 date,
continue; continue;
} }
/* "<<\ntext\n>>" or "text\n" used by several.
Must have future and */
<text_input, text_multi> * { fail = "text input"; goto catch; }
<text_input> ws+ { continue; }
<text_input> "\n" => line { // empty is okay
line++;
assert(future);
future->a = future->b = 0, future = 0;
continue;
}
<text_input> "<<\n" @s0 => text_multi { // multi-line
line++;
fprintf(stderr, "$$$ multi-line!\n");
assert(future);
future->a = s0;
continue;
}
<text_input> @s0 anylabel @s1 "\n" => line { // one line
//<text_input> @s0 semilabel @s1 "\n" => line { // one line
line++;
fprintf(stderr, "text: [[%.*s]]\n", (int)(s1 - s0), s0);
assert(future);
future->a = s0, future->b = s1, future = 0;
continue;
}
<text_multi> [^\x00\n] { continue; }
<text_multi> [\x00] { fail = "missing closing \">>\""; goto catch; }
<text_multi> "\n" { line++; continue; }
<text_multi> @s1 ">>\n" => line {
line++;
assert(future && future->a);
future->b = s1;
future = 0;
continue;
}
*/ } */ }
assert(0); /* Never gets here. */ assert(0); /* Never gets here. */
catch: catch:
@ -632,6 +685,10 @@ void scan_(struct scan *const scan) {
pair_map_table_(&scan->places.map); pair_map_table_(&scan->places.map);
place_array_(&scan->places.array); place_array_(&scan->places.array);
linemap_tree_(&scan->documents.dates);
pair_map_table_(&scan->documents.map);
source_array_(&scan->documents.array);
linemap_tree_(&scan->sources.dates); linemap_tree_(&scan->sources.dates);
pair_map_table_(&scan->sources.map); pair_map_table_(&scan->sources.map);
source_array_(&scan->sources.array); source_array_(&scan->sources.array);
@ -650,6 +707,8 @@ struct scan scan(struct journal *const jrnl) {
struct source *nul; struct source *nul;
if(!(nul = source_array_new(&scan.sources.array))) goto catch; if(!(nul = source_array_new(&scan.sources.array))) goto catch;
*nul = (struct source){0}; *nul = (struct source){0};
if(!(nul = source_array_new(&scan.documents.array))) goto catch;
*nul = (struct source){0};
} }
{ {
struct place *nul; struct place *nul;