From c096fbf6052e4705a068b89305a9033d4ed842a0 Mon Sep 17 00:00:00 2001 From: Neil Date: Sat, 6 May 2023 18:11:23 -0700 Subject: [PATCH] Works with text. --- src/scan.h | 3 +- src/scan.re.c | 139 +++++++++++++++++++++++++++++++++++--------------- 2 files changed, 100 insertions(+), 42 deletions(-) diff --git a/src/scan.h b/src/scan.h index 7f7318c..8bb3fb1 100644 --- a/src/scan.h +++ b/src/scan.h @@ -85,7 +85,7 @@ struct scan { struct source_array array; struct pairmap_table map; struct linemap_tree dates; - } sources; + } sources, documents; struct { struct place_array array; struct pairmap_table map; @@ -96,7 +96,6 @@ struct scan { struct pairmap_table map; struct linemap_tree dates; } scores; - struct source_array documents; struct glider_tree gliders; struct flight_tree flights; struct kjv_tree kjvs; diff --git a/src/scan.re.c b/src/scan.re.c index 668f248..26d500d 100644 --- a/src/scan.re.c +++ b/src/scan.re.c @@ -109,6 +109,7 @@ static int scan_day(struct scan *const scan, union date32 date, size_t line = 1; char datestr[12] = {0}; const char *fail = "perhaps a bat?"; + struct pair *future = 0; struct score *new_score = 0; struct glider *new_glider = 0; struct flight *new_flight = 0; @@ -129,12 +130,18 @@ static int scan_day(struct scan *const scan, union date32 date, ws = [ \t]; glyph = [^\x00-\x20\x7f] | [\x80-\xff]; // [^\x00\n\t ] + all weird - semitext = glyph \ ";"; - labelchar = glyph \ [;]; // perhaps? - label_mbra = labelchar \ [[\]]; - label_mpar = labelchar \ [()]; - label_in = label_mbra+ (" " label_mbra+)*; // "[label in; ...]" - label_out = (label_mpar labelchar*) (" " labelchar+)*; // "() label out" + semichar = glyph \ ";"; + brachar = semichar \ [[\]]; + parchar = glyph \ [()]; + + // label: possibly separated by spaces; used in freestanding last + anylabel = glyph+ (ws+ glyph+)*; + // label except ";"; used in freestanding + semilabel = semichar+ (ws+ semichar+)*; + // label except ";[]"; used in [] + bralabel = brachar+ (ws+ brachar+)*; + // label except start "()"; used in location + parlabel = (parchar glyph*) (ws+ glyph+)*; keyword = [A-Za-z0-9][A-Za-z0-9_-]*; @@ -165,24 +172,33 @@ static int scan_day(struct scan *const scan, union date32 date, "[flight]" :=> flight_type "[" :=> bracket + /* ^"[" ... */ * { fail = "bracket unrecognized"; goto catch; } "document: " :=> document_title - // "<<\ntext\n>>" or "text\n" used by several. - * { fail = "text"; goto catch; } - ws+ :=> text - glyph+ { - } - * { fail = "document title"; goto catch; } - @s0 label_in @s1 "]" => text { - const struct pair label = pair(s0, s1); - fprintf(stderr, "document: <<%.*s>>?\n", (int)(s1 - s0), s0); + @s0 bralabel @s1 "]" => text_input { + const union line64 key = { { (uint32_t)line, date } }; + size_t *pi; + struct source *doc; + fprintf(stderr, "document: <<%.*s>>\n", (int)(s1 - s0), s0); + if(!(doc = source_array_new(&scan->documents.array))) goto catch; + doc->name.a = s0, doc->name.b = s1; + doc->desc.a = 0, doc->desc.b = 0; + assert(!future), future = &doc->desc; + switch(linemap_tree_bulk_assign(&scan->documents.dates, key, &pi)) { + case TREE_PRESENT: fail = "duplicate"; case TREE_ERROR: goto catch; + case TREE_ABSENT: + *pi = (size_t)(doc - scan->documents.array.data); break; + } + fprintf(stderr, "%s[%zu]: new document <<%.*s>> stored at %zu.\n", + datestr, line, (int)(s1 - s0), s0, *pi); + assert(future); continue; } * { fail = "place unrecognized"; goto catch; } - @s0 label_out @s1 / "\n" => skip { also_place: { + @s0 parlabel @s1 / "\n" => skip { also_place: { const struct pair keyword = pair(s0, s1); const union line64 key = { { (uint32_t)line, date } }; size_t i, *pi; @@ -200,7 +216,7 @@ static int scan_day(struct scan *const scan, union date32 date, datestr, line, (int)(s1 - s0), s0); } continue; } "(" @t0 decimal "," @t1 decimal ")" - ws+ @s0 label_out @s1 / "\n" => skip { + ws+ @s0 parlabel @s1 / "\n" => skip { const struct pair keyword = pair(s0, s1); const double x = strtod(t0, 0), y = strtod(t1, 0); /* Safe? */ size_t *idx; @@ -242,6 +258,7 @@ static int scan_day(struct scan *const scan, union date32 date, const struct pair keyword = pair(s0, s1); const union line64 key = { { (uint32_t)line, date } }; size_t i, *pi; + //fixme: verify way before if(line > UINT32_MAX) { errno = ERANGE; fail = "too many lines of text"; goto catch; } if(!(i = pair_map_table_get(&scan->sources.map, keyword))) @@ -254,6 +271,7 @@ static int scan_day(struct scan *const scan, union date32 date, datestr, line, (int)(s1 - s0), s0); } continue; } /* New source. fixme: desc not set. */ +//////// @s0 keyword @s1 ":" [^\x00\n]+ / "\n" => skip { struct pair keyword = pair(s0, s1); size_t *idx; @@ -319,20 +337,19 @@ static int scan_day(struct scan *const scan, union date32 date, goto new_score; } * { fail = "name unrecognized"; goto catch; } - ws* @s0 semitext+ (" " semitext+)* @s1 /* ws* */ ";" - => score_date { + ws* @s0 semilabel @s1 ";" => score_date { assert(new_score); new_score->name.a = s0, new_score->name.b = s1; continue; } * { fail = "date unrecognized"; goto catch; } - ws* "~"? @s0 date ws* ";" => score_edges { + ws* "~"? @s0 date ";" => score_edges { assert(new_score); if(!pair_to_date(s0, &new_score->date)) goto catch; continue; } * { fail = "edges unrecognized"; goto catch; } - ws* "~"? @s0 uint @s1 ws* / "\n" => skip { + ws* "~"? @s0 uint @s1 / "\n" => skip { assert(new_score); if(!pair_to_natural(s0, s1, &new_score->edges)) goto catch; new_score = 0; /* Done. */ @@ -343,7 +360,7 @@ static int scan_day(struct scan *const scan, union date32 date, /* type, reg, launch, how, height, landing, pilot, dual, instr, remarks eg, [glider] 2-33A; C-GCLK; CYQQ; A; 2000'; CYQQ; ;:13;; Peters D1 */ * { fail = "glider type"; goto catch; } - ws* @s0 semitext+ @s1 ws* ";" => glider_reg { + ws* @s0 semilabel @s1 ";" => glider_reg { const union line64 key = {{ (uint32_t)line, date }}; assert(!new_glider); if(line > UINT32_MAX) { fail = "line overflow"; goto catch; } @@ -355,10 +372,10 @@ static int scan_day(struct scan *const scan, union date32 date, continue; } * { fail = "glider reg"; goto catch; } - ws* @s0 semitext+ @s1 ws* ";" => glider_launch + ws* @s0 semilabel @s1 ";" => glider_launch { new_glider->reg.a = s0, new_glider->reg.b = s1; continue; } * { fail = "glider launch"; goto catch; } - ws* @s0 airport @s1 ws* ";" => glider_how { + ws* @s0 airport @s1 ";" => glider_how { new_glider->launch.a = s0, new_glider->launch.b = s1; fprintf(stderr, "%s[%zu]: glider <<%.*s>> at <<%.*s>>\n", datestr, line, (int)(new_glider->reg.b - new_glider->reg.a), @@ -366,7 +383,7 @@ static int scan_day(struct scan *const scan, union date32 date, continue; } * { fail = "glider how"; goto catch; } - ws* @s0 [MWA] ws* ";" => glider_height { + ws* @s0 [MWA] ";" => glider_height { switch(*s0) { case 'M': new_glider->how = MotorCarTow; break; case 'W': new_glider->how = Winch; break; @@ -375,10 +392,10 @@ static int scan_day(struct scan *const scan, union date32 date, continue; } * { fail = "glider height"; goto catch; } - ws* @s0 natural @s1 "'" ws* ";" => glider_landing + ws* @s0 natural @s1 "';" => glider_landing { if(!pair_to_natural(s0, s1, &new_glider->height_ft)); continue; } * { fail = "glider landing"; goto catch; } - ws* @s0 airport @s1 ws* ";" => glider_pilot + ws* @s0 airport @s1 ";" => glider_pilot { new_glider->landing.a = s0, new_glider->landing.b = s1; continue;} * { fail = "glider pilot time"; goto catch; } ws* ";" => glider_dual /* not PIC */ @@ -402,10 +419,10 @@ static int scan_day(struct scan *const scan, union date32 date, &new_glider->instr_min)) { fail = "glider instr time"; goto catch; } continue; } * { fail = "glider remarks"; goto catch; } - ws* "\n" => line + "\n" => line { new_glider->remarks.a = new_glider->remarks.b = 0; new_glider = 0; line++; continue; } - ws* @s0 glyph+ (" " glyph+)* @s1 "\n" => line + ws* @s0 anylabel @s1 "\n" => line { new_glider->remarks.a = s0, new_glider->remarks.b = s1; new_glider = 0; line++; continue; } @@ -413,7 +430,7 @@ static int scan_day(struct scan *const scan, union date32 date, /* type; registration; launch -- landing; pic; sic; single engine day dual; pilot; instrument simulated; actual; remarks */ * { fail = "flight type"; goto catch; } - ws* @s0 semitext+ @s1 ws* ";" => flight_reg { + ws* @s0 semilabel @s1 ";" => flight_reg { const union line64 key = {{ (uint32_t)line, {{ date.day, date.month, date.year }} }}; assert(!new_flight); @@ -427,7 +444,7 @@ static int scan_day(struct scan *const scan, union date32 date, continue; } * { fail = "flight reg"; goto catch; } - ws* @s0 semitext+ @s1 ws* ";" => flight_airports + ws* @s0 semilabel @s1 ";" => flight_airports { new_flight->reg.a = s0, new_flight->reg.b = s1; continue; } * { fail = "flight airports"; goto catch; } ws* @s0 airport @s1 ws* "--" @@ -440,14 +457,12 @@ static int scan_day(struct scan *const scan, union date32 date, continue; } * { fail = "flight pic"; goto catch; } - ws* @s0 semitext+ (ws+ semitext+)* @s1 /* ws*? */";" - => flight_sic + ws* @s0 semilabel @s1 ";" => flight_sic { new_flight->pilot.a = s0, new_flight->pilot.b = s1; continue; } * { fail = "flight sic"; goto catch; } ws* ";" => flight_dual { new_flight->copilot.a = new_flight->copilot.b = 0; continue; } - ws* @s0 semitext+ (ws+ semitext+)* @s1 ";" - => flight_dual + ws* @s0 semilabel @s1 ";" => flight_dual { new_flight->copilot.a = s0, new_flight->copilot.b = s1; continue; } * { fail = "flight dual time"; goto catch; } ws* ";" => flight_pilot @@ -478,10 +493,10 @@ static int scan_day(struct scan *const scan, union date32 date, &new_flight->ifr_min)) { fail = "flight ifr time"; goto catch; } continue; } * { fail = "flight remarks"; goto catch; } - ws* "\n" => line + "\n" => line { new_flight->remarks.a = new_flight->remarks.b = 0; new_flight = 0; line++; continue; } - ws* @s0 glyph+ (ws+ glyph+)* @s1 "\n" => line + ws* @s0 anylabel @s1 "\n" => line { new_flight->remarks.a = s0, new_flight->remarks.b = s1; new_flight = 0; line++; continue; } @@ -497,7 +512,8 @@ static int scan_day(struct scan *const scan, union date32 date, "Judges" / kjvlookat => book { book = Judges; continue; } "Ruth" / kjvlookat => book { book = Ruth; continue; } first "Samuel" / kjvlookat => book { book = ISamuel; continue; } - second "Samuel" / kjvlookat => book { book = IISamuel; continue; } + second "Samuel" / kjvlookat => book + { book = IISamuel; continue; } first "Kings" / kjvlookat => book { book = IKings; continue; } second "Kings" / kjvlookat => book { book = IIKings; continue; } first "Chronicles" / kjvlookat @@ -568,7 +584,7 @@ static int scan_day(struct scan *const scan, union date32 date, "Revelation" / kjvlookat => book { book = Revelation; continue; } * { fail = "kjv unrecognized"; goto catch; } /* 19:15a, just ignore the a. */ - ws+ @s0 natural @s1 ":" @t0 natural @t1 [ab]? { + " " @s0 natural @s1 ":" @t0 natural @t1 [ab]? { if(chapter || verse || verse_end) { fail = "kjv reference"; goto catch; } if(!pair_to_natural(s0, s1, &chapter) @@ -583,7 +599,7 @@ static int scan_day(struct scan *const scan, union date32 date, { fail = "kjv range numerical error"; goto catch; } continue; } - ws+ "--" ws+ => skip { + " -- " => skip { if(!chapter || !verse) { fail = "kjv missing information"; goto catch; } if(verse_end && verse_end <= verse) @@ -608,6 +624,43 @@ static int scan_day(struct scan *const scan, union date32 date, continue; } + + /* "<<\ntext\n>>" or "text\n" used by several. + Must have future and */ + * { fail = "text input"; goto catch; } + ws+ { continue; } + "\n" => line { // empty is okay + line++; + assert(future); + future->a = future->b = 0, future = 0; + continue; + } + "<<\n" @s0 => text_multi { // multi-line + line++; + fprintf(stderr, "$$$ multi-line!\n"); + assert(future); + future->a = s0; + continue; + } + @s0 anylabel @s1 "\n" => line { // one line + // @s0 semilabel @s1 "\n" => line { // one line + line++; + fprintf(stderr, "text: [[%.*s]]\n", (int)(s1 - s0), s0); + assert(future); + future->a = s0, future->b = s1, future = 0; + continue; + } + [^\x00\n] { continue; } + [\x00] { fail = "missing closing \">>\""; goto catch; } + "\n" { line++; continue; } + @s1 ">>\n" => line { + line++; + assert(future && future->a); + future->b = s1; + future = 0; + continue; + } + */ } assert(0); /* Never gets here. */ catch: @@ -632,6 +685,10 @@ void scan_(struct scan *const scan) { pair_map_table_(&scan->places.map); place_array_(&scan->places.array); + linemap_tree_(&scan->documents.dates); + pair_map_table_(&scan->documents.map); + source_array_(&scan->documents.array); + linemap_tree_(&scan->sources.dates); pair_map_table_(&scan->sources.map); source_array_(&scan->sources.array); @@ -650,6 +707,8 @@ struct scan scan(struct journal *const jrnl) { struct source *nul; if(!(nul = source_array_new(&scan.sources.array))) goto catch; *nul = (struct source){0}; + if(!(nul = source_array_new(&scan.documents.array))) goto catch; + *nul = (struct source){0}; } { struct place *nul;