Too complex substring.

This commit is contained in:
Neil 2023-02-02 21:31:24 -08:00
parent f3c63b0d45
commit b683b1f3ec
7 changed files with 87 additions and 246 deletions

View File

@ -36,6 +36,7 @@ default: $(projects)
# success
bin/test-text: build/text.o build/test_text.o
bin/test-strings: build/substrings.o build/test_substrings.o
bin/test-journal: build/text.o build/journal.o build/test_journal.o
bin/test-source: build/text.o build/journal.o build/source.o build/test_source.o
bin/test-kjv: build/text.o build/kjv.o build/test_kjv.o

View File

@ -1,8 +1,9 @@
#include <stddef.h>
#include <errno.h>
#include <stdint.h>
/** `printf`-compatible substring. */
struct substring { const char *sub; int size; };
struct substring { const char *sub; size_t size; };
/** Parse unsigned; [`s`,`e`) => `n`. */
static int helper_natural(const char *s, const char *const e, uint32_t *const n)
@ -18,7 +19,15 @@ static int helper_natural(const char *s, const char *const e, uint32_t *const n)
return 1;
}
/** djb2 <http://www.cse.yorku.ca/~oz/hash.html> */
static uint32_t djb2(const char *s) {
const unsigned char *str = (const unsigned char *)s;
uint32_t hash = 5381, c;
while(c = *str++) hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
return hash;
}
static void unused_helper_coda(void);
static void unused_helper(void)
{ helper_natural(0, 0, 0); unused_helper_coda(); }
{ helper_natural(0, 0, 0); djb2(0); unused_helper_coda(); }
static void unused_helper_coda(void) { unused_helper(); }

35
src/lorem.c Normal file
View File

@ -0,0 +1,35 @@
/** @license 2023 Neil Edelman, distributed under the terms of the
[MIT License](https://opensource.org/licenses/MIT).
Temporary strings duplicated from substrings.
@std C89 */
#include "lorem.h"
#include <assert.h>
#include <errno.h>
/*#define ARRAY_NAME block
#define ARRAY_TYPE char *
#include "array.h"*/
#define POOL_NAME char
#define POOL_TYPE char
#include "pool.h"
static struct char_pool pool;
const char *lorem_dup(const struct substring sub) {
char *string;
if(!sub.sub) { errno = EDOM; return 0; }
if(sub.size == ~(size_t)0) { errno = ERANGE; return 0; }
if(!char_pool_buffer(&pool, sub.size + 1)) return 0;
assert(pool.capacity0 - pool.slots.data[0].size >= sub.size + 1);
string = pool.slots.data[0].slab + pool.slots.data[0].size;
memcpy(string, sub.sub, sub.size);
string[sub.size] = '\0';
pool.slots.data[0].size += sub.size + 1;
return string;
}
void lorem_(void) { char_pool_(&pool); }

4
src/lorem.h Normal file
View File

@ -0,0 +1,4 @@
#include "helper.h"
const char *lorem_dup(const struct substring sub);
void lorem_(void);

View File

@ -60,64 +60,6 @@ set datafile separator comma
plot $Data u 2:($3) smooth cumulative with steps lw 2 lc "red" ti "cumulative count"
### end of code
*/
static int bible_graph(/*const*/ struct page_tree *const journal) {
enum { CHILL, BOOK, CHAPTER, WORD, NEXT } state = CHILL;
struct page_tree_entry entry = { 0, 0 };
struct lex *lex = 0;
size_t count = 0;
for(struct page_tree_iterator p_it = page_tree_begin(journal);
(entry = page_tree_next(&p_it)).key; ) {
struct page *const page = entry.value;
for(struct lex_array_iterator l_it = lex_array_iterator(&page->meaning);
(lex = lex_array_next(&l_it)); ) {
switch(lex->symbol) {
case KJV_BOOK:
if(state != CHILL && state != WORD) goto catch;
if(state == WORD) printf("\n");
fprintf(stderr, "%d-%.2d-%.2d: \"%.*s\", ",
entry.key->year, entry.key->month, entry.key->day,
(int)(lex->s1 - lex->s0), lex->s0);
state = BOOK;
break;
case KJV_CHAPTER_VERSE:
if(state != BOOK) goto catch;
printf("\"%.*s\", \"", (int)(lex->s1 - lex->s0), lex->s0);
state = CHAPTER;
break;
case KJV_NEXT:
if(state != WORD) goto catch;
printf("\"\n");
break;
case KJV_TEXT:
if(state != WORD && state != CHAPTER && state != NEXT)
goto catch;
printf("%s%.*s<%d>", state == WORD ? "*" : "",
(int)(lex->s1 - lex->s0 < 0 ? 10 : lex->s1 - lex->s0),
lex->s0, (int)(lex->s1 - lex->s0));
count++;
state = WORD;
break;
default:
if(state != CHILL && state != WORD) goto catch;
if(state == WORD) printf("\"\n"), state = CHILL;
break;
}
}
if(state != CHILL && state != WORD) goto catch;
if(state == WORD) printf("\n"), state = CHILL;
}
printf("Count: %lu.\n", (unsigned long)count);
return 1;
catch:
fprintf(stderr, "Bible error.\n");
if(entry.key) {
fprintf(stderr, "On date: %d-%.2d-%.2d.\n",
entry.key->year, entry.key->month, entry.key->day);
if(lex) fprintf(stderr, "At line %lu.\n", (unsigned long)lex->line);
}
errno = EILSEQ;
return 0;
}
#define C_BLACK "\033[0;30m"
#define C_RED "\033[0;31m"
@ -139,190 +81,6 @@ catch:
#define C_RESET "\033[0m"
int main(int argc, char **argv) {
int success = EXIT_SUCCESS;
char *intent = 0;
/* For reading in files, overwritten. */
DIR *dir = 0;
struct dirent *de;
struct int_array years = int_array(), months = int_array(),
days = int_array();
int *y, *y_end;
struct page_tree journal = page_tree();
errno = 0;
if(argc != 2) { intent = "needs journal location, which should"
" contain <year>/<month>/<day>.txt"; goto catch; }
/* Get the years list as directories matching a year. */
if(chdir(argv[1]) == -1 || !(dir = opendir("."))) goto catch;
while((de = readdir(dir))) {
struct stat st;
int year, *p;
if(!lex_looks_like_year(de->d_name, &year)) continue;
if(stat(de->d_name, &st)) goto catch;
if(!S_ISDIR(st.st_mode)) continue;
if(!(p = int_array_new(&years))) goto catch;
*p = year;
}
closedir(dir), dir = 0;
/* Sort the years for sensible ordering of parsing. */
qsort(years.data, years.size, sizeof *years.data, &void_int_cmp);
fprintf(stderr, "Years in <<%s>>: %s.\n",
argv[1], int_array_to_string(&years));
/* Go though each year. */
for(y = years.data, y_end = y + years.size; y < y_end; y++) {
char fn[64];
int *m, *m_end;
sprintf(fn, "%d", *y);
/* Get the months as directories. */
if(chdir(fn) == -1 || !(dir = opendir("."))) goto catch;
while((de = readdir(dir))) {
struct stat st;
int month, *p;
if(!(month = lex_looks_like_month(de->d_name))) continue;
if(stat(de->d_name, &st)) goto catch;
if(!S_ISDIR(st.st_mode)) continue;
if(!(p = int_array_new(&months))) goto catch;
*p = month;
}
closedir(dir), dir = 0;
qsort(months.data, months.size, sizeof *months.data, &void_int_cmp);
fprintf(stderr, "Months in <<%s>>: %s.)\n",
fn, int_array_to_string(&months));
/* Go though each month. */
for(m = months.data, m_end = m + months.size; m < m_end; m++) {
int *d, *d_end;
sprintf(fn, "%.2d", *m);
/* Get the days as files. */
if(chdir(fn) == -1 || !(dir = opendir("."))) goto catch;
while((de = readdir(dir))) {
struct stat st;
int day, *p;
/* fixme: Have yyyy-mm-dd to figure out how many days. */
if(!(day = lex_looks_like_day(de->d_name))) continue;
if(stat(de->d_name, &st)) goto catch;
if(S_ISDIR(st.st_mode)) continue;
if(!(p = int_array_new(&days))) goto catch;
*p = day;
}
closedir(dir), dir = 0;
qsort(days.data, days.size, sizeof *days.data, &void_int_cmp);
fprintf(stderr, "Days in <<%s>>: %s.\n",
fn, int_array_to_string(&days));
for(d = days.data, d_end = d + days.size; d < d_end; d++) {
struct lex *lex = 0;
struct page *page = 0;
union date32 d32;
if(!(d32 = date_to_32(*y, *m, *d)).year) { errno = EILSEQ;
intent = "date parse error"; goto syntax; }
sprintf(fn, "%.2d.txt", *d);
if(page_tree_bulk_add(&journal, d32, &page) != TREE_UNIQUE) {
if(!errno) intent = "not unique", errno = EDOM;
goto syntax;
}
page->entry = char_array();
page->meaning = lex_array();
if(!append_file(&page->entry, fn)) goto syntax;
int first = 1;
for(lex_reset(page->entry.data); ; ) {
if(!(lex = lex_array_new(&page->meaning))) goto syntax;
if(!lex_next(lex)) {
if(lex->symbol != END) { errno = EILSEQ; goto syntax; }
break; /* Terminated successfully. */
}
switch(lex->symbol) {
case TEXT: printf("%s%.*s",
first ? "" : " ", (int)(lex->s1 - lex->s0), lex->s0);
first = 0; break;
case PARAGRAPH: printf("\n" C_RESET); break;
case KJV_BOOK: printf(C_YELLOW "%.*s",
(int)(lex->s1 - lex->s0), lex->s0); break;
case KJV_CHAPTER_VERSE: printf(" ch. %.*s",
(int)(lex->s1 - lex->s0), lex->s0); break;
case KJV_TEXT: printf("%.*s",
(int)(lex->s1 - lex->s0), lex->s0); break;
case KJV_NEXT: printf("(next)\n"); break;
default:
fprintf(stderr, "%lu: %s",
(unsigned long)lex->line, lex_symbols[lex->symbol]);
if(lex->s0 && lex->s1) {
if(lex->s0 + INT_MAX < lex->s1)
intent = "line too long", errno = EILSEQ;
else
fprintf(stderr, " <<%.*s>>",
(int)(lex->s1 - lex->s0), lex->s0);
}
fprintf(stderr, ".\n");
break;
}
}
continue;
syntax:
fprintf(stderr, "On date: %d-%.2d-%.2d.\n", *y, *m, *d);
if(!page) goto catch;
if(!lex) { fprintf(stderr, "While parsing <<<\n%s>>>.\n",
page->entry.data); goto catch; }
for(struct lex_array_iterator it
= lex_array_iterator(&page->meaning);
lex = lex_array_next(&it); ) {
fprintf(stderr, "%lu: %s",
(unsigned long)lex->line, lex_symbols[lex->symbol]);
if(lex->s0 && lex->s1) {
if(lex->s0 + INT_MAX < lex->s1)
intent = "line too long", errno = EILSEQ;
else
fprintf(stderr, " <<%.*s>>",
(int)(lex->s1 - lex->s0), lex->s0);
}
fprintf(stderr, ".\n");
}
goto catch;
}
int_array_clear(&days);
if(chdir("..") == -1) goto catch;
}
int_array_clear(&months);
if(chdir("..") == -1) goto catch;
/* fixme: Expand, contact is the next thing that it doesn't get. */
if(*y == 1993/*1996*/) break;
}
page_tree_bulk_finish(&journal);
int_array_(&years), int_array_(&months), int_array_(&days);
fprintf(stderr, "Journal has entries: %s\n", page_tree_to_string(&journal));
/* Do something interesting? */
if(!bible_graph(&journal)) goto catch;
goto finally;
catch:
success = EXIT_FAILURE;
perror("interpret");
if(intent) fprintf(stderr, "Further explanation: %s.\n", intent);
finally:
if(dir && closedir(dir)) success = EXIT_FAILURE, perror("dir");
int_array_(&years), int_array_(&months), int_array_(&days);
struct page_tree_entry entry;
for(struct page_tree_iterator it = page_tree_begin(&journal);
(entry = page_tree_next(&it)).key; ) {
struct page *const page = entry.value;
char z[12];
date32_to_string(*entry.key, &z);
lex_array_(&page->meaning);
char_array_(&page->entry);
}
return success;
}
/* Popular KJV 738137 words. */
struct scan scan(const char *const buffer) {

View File

@ -5,11 +5,25 @@
#include "../src/source.h" /* base */
#include "../src/journal.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
/* This is a lookup table for source strings ("2000glider") to substring the
first description ("Glider pilot log book"). */
static int lookup_is_equal(const char *const x, const char *const y)
{ return !strcmp(x, y); }
static uint32_t lookup_hash(const char *const x) { return djb2(x); }
static void lookup_to_string(const char *x, const struct substring desc,
char (*const a)[12]) { (void)desc; sprintf(*a, "%.11s", x); }
static struct substring lookup_default = { 0, 0 };
#define TABLE_NAME lookup
#define TABLE_KEY char *
#define TABLE_UINT uint32_t
#define TABLE_VALUE struct substring
#define TABLE_DEFAULT lookup_default
#define TABLE_TO_STRING
#include "../src/table.h"
static void source_to_string(const union line64 line, const struct substring *u,

20
test/test_substrings.c Normal file
View File

@ -0,0 +1,20 @@
#include "../src/lorem.h"
#include <stdlib.h>
#include <stdio.h>
int main(void) {
int success = EXIT_SUCCESS;
const char lorem[] = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean tincidunt leo neque. Integer vel bibendum lectus, a vulputate dolor. Vivamus vestibulum quam ut euismod aliquet. Vivamus vel pulvinar felis, eu dictum lorem. Integer scelerisque lobortis orci nec tincidunt. Mauris vulputate ipsum non tempus tincidunt. Pellentesque nec iaculis dolor. Curabitur bibendum pretium dui euismod tincidunt. In cursus, libero et porta placerat, ante ante accumsan lacus, nec sollicitudin ex elit nec lectus. Sed nisi sem, rhoncus sed nulla et, faucibus feugiat eros.";
struct substring s;
const char *a;
s.sub = lorem + 6, s.size = 5;
if(!(a = lorem_dup(s))) goto catch;
printf("lorem: <%s>\n", a);
goto finally;
catch:
success = EXIT_FAILURE;
perror("text");
finally:
lorem_();
return success;
}